##// END OF EJS Templates
revlog: simplify entry update logic in `rewrite_sidedata`...
marmoute -
r48019:de63be07 default
parent child Browse files
Show More
@@ -1,3264 +1,3267 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FEATURES_BY_VERSION,
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 INDEX_HEADER,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 SUPPORTED_FLAGS,
51 51 )
52 52 from .revlogutils.flagutil import (
53 53 REVIDX_DEFAULT_FLAGS,
54 54 REVIDX_ELLIPSIS,
55 55 REVIDX_EXTSTORED,
56 56 REVIDX_FLAGS_ORDER,
57 57 REVIDX_HASCOPIESINFO,
58 58 REVIDX_ISCENSORED,
59 59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 docket as docketutil,
79 79 flagutil,
80 80 nodemap as nodemaputil,
81 81 revlogv0,
82 82 sidedata as sidedatautil,
83 83 )
84 84 from .utils import (
85 85 storageutil,
86 86 stringutil,
87 87 )
88 88
89 89 # blanked usage of all the name to prevent pyflakes constraints
90 90 # We need these name available in the module for extensions.
91 91
92 92 REVLOGV0
93 93 REVLOGV1
94 94 REVLOGV2
95 95 FLAG_INLINE_DATA
96 96 FLAG_GENERALDELTA
97 97 REVLOG_DEFAULT_FLAGS
98 98 REVLOG_DEFAULT_FORMAT
99 99 REVLOG_DEFAULT_VERSION
100 100 REVLOGV1_FLAGS
101 101 REVLOGV2_FLAGS
102 102 REVIDX_ISCENSORED
103 103 REVIDX_ELLIPSIS
104 104 REVIDX_HASCOPIESINFO
105 105 REVIDX_EXTSTORED
106 106 REVIDX_DEFAULT_FLAGS
107 107 REVIDX_FLAGS_ORDER
108 108 REVIDX_RAWTEXT_CHANGING_FLAGS
109 109
110 110 parsers = policy.importmod('parsers')
111 111 rustancestor = policy.importrust('ancestor')
112 112 rustdagop = policy.importrust('dagop')
113 113 rustrevlog = policy.importrust('revlog')
114 114
115 115 # Aliased for performance.
116 116 _zlibdecompress = zlib.decompress
117 117
118 118 # max size of revlog with inline data
119 119 _maxinline = 131072
120 120 _chunksize = 1048576
121 121
122 122 # Flag processors for REVIDX_ELLIPSIS.
123 123 def ellipsisreadprocessor(rl, text):
124 124 return text, False
125 125
126 126
127 127 def ellipsiswriteprocessor(rl, text):
128 128 return text, False
129 129
130 130
131 131 def ellipsisrawprocessor(rl, text):
132 132 return False
133 133
134 134
135 135 ellipsisprocessor = (
136 136 ellipsisreadprocessor,
137 137 ellipsiswriteprocessor,
138 138 ellipsisrawprocessor,
139 139 )
140 140
141 141
142 142 def offset_type(offset, type):
143 143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
144 144 raise ValueError(b'unknown revlog index flags')
145 145 return int(int(offset) << 16 | type)
146 146
147 147
148 148 def _verify_revision(rl, skipflags, state, node):
149 149 """Verify the integrity of the given revlog ``node`` while providing a hook
150 150 point for extensions to influence the operation."""
151 151 if skipflags:
152 152 state[b'skipread'].add(node)
153 153 else:
154 154 # Side-effect: read content and verify hash.
155 155 rl.revision(node)
156 156
157 157
158 158 # True if a fast implementation for persistent-nodemap is available
159 159 #
160 160 # We also consider we have a "fast" implementation in "pure" python because
161 161 # people using pure don't really have performance consideration (and a
162 162 # wheelbarrow of other slowness source)
163 163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
164 164 parsers, 'BaseIndexObject'
165 165 )
166 166
167 167
168 168 @attr.s(slots=True, frozen=True)
169 169 class _revisioninfo(object):
170 170 """Information about a revision that allows building its fulltext
171 171 node: expected hash of the revision
172 172 p1, p2: parent revs of the revision
173 173 btext: built text cache consisting of a one-element list
174 174 cachedelta: (baserev, uncompressed_delta) or None
175 175 flags: flags associated to the revision storage
176 176
177 177 One of btext[0] or cachedelta must be set.
178 178 """
179 179
180 180 node = attr.ib()
181 181 p1 = attr.ib()
182 182 p2 = attr.ib()
183 183 btext = attr.ib()
184 184 textlen = attr.ib()
185 185 cachedelta = attr.ib()
186 186 flags = attr.ib()
187 187
188 188
189 189 @interfaceutil.implementer(repository.irevisiondelta)
190 190 @attr.s(slots=True)
191 191 class revlogrevisiondelta(object):
192 192 node = attr.ib()
193 193 p1node = attr.ib()
194 194 p2node = attr.ib()
195 195 basenode = attr.ib()
196 196 flags = attr.ib()
197 197 baserevisionsize = attr.ib()
198 198 revision = attr.ib()
199 199 delta = attr.ib()
200 200 sidedata = attr.ib()
201 201 protocol_flags = attr.ib()
202 202 linknode = attr.ib(default=None)
203 203
204 204
205 205 @interfaceutil.implementer(repository.iverifyproblem)
206 206 @attr.s(frozen=True)
207 207 class revlogproblem(object):
208 208 warning = attr.ib(default=None)
209 209 error = attr.ib(default=None)
210 210 node = attr.ib(default=None)
211 211
212 212
213 213 def parse_index_v1(data, inline):
214 214 # call the C implementation to parse the index data
215 215 index, cache = parsers.parse_index2(data, inline)
216 216 return index, cache
217 217
218 218
219 219 def parse_index_v2(data, inline):
220 220 # call the C implementation to parse the index data
221 221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
222 222 return index, cache
223 223
224 224
225 225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
226 226
227 227 def parse_index_v1_nodemap(data, inline):
228 228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
229 229 return index, cache
230 230
231 231
232 232 else:
233 233 parse_index_v1_nodemap = None
234 234
235 235
236 236 def parse_index_v1_mixed(data, inline):
237 237 index, cache = parse_index_v1(data, inline)
238 238 return rustrevlog.MixedIndex(index), cache
239 239
240 240
241 241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
242 242 # signed integer)
243 243 _maxentrysize = 0x7FFFFFFF
244 244
245 245
246 246 class revlog(object):
247 247 """
248 248 the underlying revision storage object
249 249
250 250 A revlog consists of two parts, an index and the revision data.
251 251
252 252 The index is a file with a fixed record size containing
253 253 information on each revision, including its nodeid (hash), the
254 254 nodeids of its parents, the position and offset of its data within
255 255 the data file, and the revision it's based on. Finally, each entry
256 256 contains a linkrev entry that can serve as a pointer to external
257 257 data.
258 258
259 259 The revision data itself is a linear collection of data chunks.
260 260 Each chunk represents a revision and is usually represented as a
261 261 delta against the previous chunk. To bound lookup time, runs of
262 262 deltas are limited to about 2 times the length of the original
263 263 version data. This makes retrieval of a version proportional to
264 264 its size, or O(1) relative to the number of revisions.
265 265
266 266 Both pieces of the revlog are written to in an append-only
267 267 fashion, which means we never need to rewrite a file to insert or
268 268 remove data, and can use some simple techniques to avoid the need
269 269 for locking while reading.
270 270
271 271 If checkambig, indexfile is opened with checkambig=True at
272 272 writing, to avoid file stat ambiguity.
273 273
274 274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
275 275 index will be mmapped rather than read if it is larger than the
276 276 configured threshold.
277 277
278 278 If censorable is True, the revlog can have censored revisions.
279 279
280 280 If `upperboundcomp` is not None, this is the expected maximal gain from
281 281 compression for the data content.
282 282
283 283 `concurrencychecker` is an optional function that receives 3 arguments: a
284 284 file handle, a filename, and an expected position. It should check whether
285 285 the current position in the file handle is valid, and log/warn/fail (by
286 286 raising).
287 287 """
288 288
289 289 _flagserrorclass = error.RevlogError
290 290
291 291 def __init__(
292 292 self,
293 293 opener,
294 294 target,
295 295 radix,
296 296 postfix=None, # only exist for `tmpcensored` now
297 297 checkambig=False,
298 298 mmaplargeindex=False,
299 299 censorable=False,
300 300 upperboundcomp=None,
301 301 persistentnodemap=False,
302 302 concurrencychecker=None,
303 303 trypending=False,
304 304 ):
305 305 """
306 306 create a revlog object
307 307
308 308 opener is a function that abstracts the file opening operation
309 309 and can be used to implement COW semantics or the like.
310 310
311 311 `target`: a (KIND, ID) tuple that identify the content stored in
312 312 this revlog. It help the rest of the code to understand what the revlog
313 313 is about without having to resort to heuristic and index filename
314 314 analysis. Note: that this must be reliably be set by normal code, but
315 315 that test, debug, or performance measurement code might not set this to
316 316 accurate value.
317 317 """
318 318 self.upperboundcomp = upperboundcomp
319 319
320 320 self.radix = radix
321 321
322 322 self._docket_file = None
323 323 self._indexfile = None
324 324 self._datafile = None
325 325 self._nodemap_file = None
326 326 self.postfix = postfix
327 327 self._trypending = trypending
328 328 self.opener = opener
329 329 if persistentnodemap:
330 330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
331 331
332 332 assert target[0] in ALL_KINDS
333 333 assert len(target) == 2
334 334 self.target = target
335 335 # When True, indexfile is opened with checkambig=True at writing, to
336 336 # avoid file stat ambiguity.
337 337 self._checkambig = checkambig
338 338 self._mmaplargeindex = mmaplargeindex
339 339 self._censorable = censorable
340 340 # 3-tuple of (node, rev, text) for a raw revision.
341 341 self._revisioncache = None
342 342 # Maps rev to chain base rev.
343 343 self._chainbasecache = util.lrucachedict(100)
344 344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
345 345 self._chunkcache = (0, b'')
346 346 # How much data to read and cache into the raw revlog data cache.
347 347 self._chunkcachesize = 65536
348 348 self._maxchainlen = None
349 349 self._deltabothparents = True
350 350 self.index = None
351 351 self._docket = None
352 352 self._nodemap_docket = None
353 353 # Mapping of partial identifiers to full nodes.
354 354 self._pcache = {}
355 355 # Mapping of revision integer to full node.
356 356 self._compengine = b'zlib'
357 357 self._compengineopts = {}
358 358 self._maxdeltachainspan = -1
359 359 self._withsparseread = False
360 360 self._sparserevlog = False
361 361 self.hassidedata = False
362 362 self._srdensitythreshold = 0.50
363 363 self._srmingapsize = 262144
364 364
365 365 # Make copy of flag processors so each revlog instance can support
366 366 # custom flags.
367 367 self._flagprocessors = dict(flagutil.flagprocessors)
368 368
369 369 # 2-tuple of file handles being used for active writing.
370 370 self._writinghandles = None
371 371 # prevent nesting of addgroup
372 372 self._adding_group = None
373 373
374 374 self._loadindex()
375 375
376 376 self._concurrencychecker = concurrencychecker
377 377
378 378 def _init_opts(self):
379 379 """process options (from above/config) to setup associated default revlog mode
380 380
381 381 These values might be affected when actually reading on disk information.
382 382
383 383 The relevant values are returned for use in _loadindex().
384 384
385 385 * newversionflags:
386 386 version header to use if we need to create a new revlog
387 387
388 388 * mmapindexthreshold:
389 389 minimal index size for start to use mmap
390 390
391 391 * force_nodemap:
392 392 force the usage of a "development" version of the nodemap code
393 393 """
394 394 mmapindexthreshold = None
395 395 opts = self.opener.options
396 396
397 397 if b'revlogv2' in opts:
398 398 new_header = REVLOGV2 | FLAG_INLINE_DATA
399 399 elif b'revlogv1' in opts:
400 400 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 401 if b'generaldelta' in opts:
402 402 new_header |= FLAG_GENERALDELTA
403 403 elif b'revlogv0' in self.opener.options:
404 404 new_header = REVLOGV0
405 405 else:
406 406 new_header = REVLOG_DEFAULT_VERSION
407 407
408 408 if b'chunkcachesize' in opts:
409 409 self._chunkcachesize = opts[b'chunkcachesize']
410 410 if b'maxchainlen' in opts:
411 411 self._maxchainlen = opts[b'maxchainlen']
412 412 if b'deltabothparents' in opts:
413 413 self._deltabothparents = opts[b'deltabothparents']
414 414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 415 self._lazydeltabase = False
416 416 if self._lazydelta:
417 417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 418 if b'compengine' in opts:
419 419 self._compengine = opts[b'compengine']
420 420 if b'zlib.level' in opts:
421 421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 422 if b'zstd.level' in opts:
423 423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 424 if b'maxdeltachainspan' in opts:
425 425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 429 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 430 # sparse-revlog forces sparse-read
431 431 self._withsparseread = self._sparserevlog or withsparseread
432 432 if b'sparse-read-density-threshold' in opts:
433 433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 434 if b'sparse-read-min-gap-size' in opts:
435 435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 436 if opts.get(b'enableellipsis'):
437 437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 438
439 439 # revlog v0 doesn't have flag processors
440 440 for flag, processor in pycompat.iteritems(
441 441 opts.get(b'flagprocessors', {})
442 442 ):
443 443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 444
445 445 if self._chunkcachesize <= 0:
446 446 raise error.RevlogError(
447 447 _(b'revlog chunk cache size %r is not greater than 0')
448 448 % self._chunkcachesize
449 449 )
450 450 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 451 raise error.RevlogError(
452 452 _(b'revlog chunk cache size %r is not a power of 2')
453 453 % self._chunkcachesize
454 454 )
455 455 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 456 return new_header, mmapindexthreshold, force_nodemap
457 457
458 458 def _get_data(self, filepath, mmap_threshold, size=None):
459 459 """return a file content with or without mmap
460 460
461 461 If the file is missing return the empty string"""
462 462 try:
463 463 with self.opener(filepath) as fp:
464 464 if mmap_threshold is not None:
465 465 file_size = self.opener.fstat(fp).st_size
466 466 if file_size >= mmap_threshold:
467 467 if size is not None:
468 468 # avoid potentiel mmap crash
469 469 size = min(file_size, size)
470 470 # TODO: should .close() to release resources without
471 471 # relying on Python GC
472 472 if size is None:
473 473 return util.buffer(util.mmapread(fp))
474 474 else:
475 475 return util.buffer(util.mmapread(fp, size))
476 476 if size is None:
477 477 return fp.read()
478 478 else:
479 479 return fp.read(size)
480 480 except IOError as inst:
481 481 if inst.errno != errno.ENOENT:
482 482 raise
483 483 return b''
484 484
485 485 def _loadindex(self):
486 486
487 487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 488
489 489 if self.postfix is not None:
490 490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 492 entry_point = b'%s.i.a' % self.radix
493 493 else:
494 494 entry_point = b'%s.i' % self.radix
495 495
496 496 entry_data = b''
497 497 self._initempty = True
498 498 entry_data = self._get_data(entry_point, mmapindexthreshold)
499 499 if len(entry_data) > 0:
500 500 header = INDEX_HEADER.unpack(entry_data[:4])[0]
501 501 self._initempty = False
502 502 else:
503 503 header = new_header
504 504
505 505 self._format_flags = header & ~0xFFFF
506 506 self._format_version = header & 0xFFFF
507 507
508 508 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
509 509 if supported_flags is None:
510 510 msg = _(b'unknown version (%d) in revlog %s')
511 511 msg %= (self._format_version, self.display_id)
512 512 raise error.RevlogError(msg)
513 513 elif self._format_flags & ~supported_flags:
514 514 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
515 515 display_flag = self._format_flags >> 16
516 516 msg %= (display_flag, self._format_version, self.display_id)
517 517 raise error.RevlogError(msg)
518 518
519 519 features = FEATURES_BY_VERSION[self._format_version]
520 520 self._inline = features[b'inline'](self._format_flags)
521 521 self._generaldelta = features[b'generaldelta'](self._format_flags)
522 522 self.hassidedata = features[b'sidedata']
523 523
524 524 if not features[b'docket']:
525 525 self._indexfile = entry_point
526 526 index_data = entry_data
527 527 else:
528 528 self._docket_file = entry_point
529 529 if self._initempty:
530 530 self._docket = docketutil.default_docket(self, header)
531 531 else:
532 532 self._docket = docketutil.parse_docket(
533 533 self, entry_data, use_pending=self._trypending
534 534 )
535 535 self._indexfile = self._docket.index_filepath()
536 536 index_data = b''
537 537 index_size = self._docket.index_end
538 538 if index_size > 0:
539 539 index_data = self._get_data(
540 540 self._indexfile, mmapindexthreshold, size=index_size
541 541 )
542 542 if len(index_data) < index_size:
543 543 msg = _(b'too few index data for %s: got %d, expected %d')
544 544 msg %= (self.display_id, len(index_data), index_size)
545 545 raise error.RevlogError(msg)
546 546
547 547 self._inline = False
548 548 # generaldelta implied by version 2 revlogs.
549 549 self._generaldelta = True
550 550 # the logic for persistent nodemap will be dealt with within the
551 551 # main docket, so disable it for now.
552 552 self._nodemap_file = None
553 553
554 554 if self.postfix is None:
555 555 self._datafile = b'%s.d' % self.radix
556 556 else:
557 557 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
558 558
559 559 self.nodeconstants = sha1nodeconstants
560 560 self.nullid = self.nodeconstants.nullid
561 561
562 562 # sparse-revlog can't be on without general-delta (issue6056)
563 563 if not self._generaldelta:
564 564 self._sparserevlog = False
565 565
566 566 self._storedeltachains = True
567 567
568 568 devel_nodemap = (
569 569 self._nodemap_file
570 570 and force_nodemap
571 571 and parse_index_v1_nodemap is not None
572 572 )
573 573
574 574 use_rust_index = False
575 575 if rustrevlog is not None:
576 576 if self._nodemap_file is not None:
577 577 use_rust_index = True
578 578 else:
579 579 use_rust_index = self.opener.options.get(b'rust.index')
580 580
581 581 self._parse_index = parse_index_v1
582 582 if self._format_version == REVLOGV0:
583 583 self._parse_index = revlogv0.parse_index_v0
584 584 elif self._format_version == REVLOGV2:
585 585 self._parse_index = parse_index_v2
586 586 elif devel_nodemap:
587 587 self._parse_index = parse_index_v1_nodemap
588 588 elif use_rust_index:
589 589 self._parse_index = parse_index_v1_mixed
590 590 try:
591 591 d = self._parse_index(index_data, self._inline)
592 592 index, _chunkcache = d
593 593 use_nodemap = (
594 594 not self._inline
595 595 and self._nodemap_file is not None
596 596 and util.safehasattr(index, 'update_nodemap_data')
597 597 )
598 598 if use_nodemap:
599 599 nodemap_data = nodemaputil.persisted_data(self)
600 600 if nodemap_data is not None:
601 601 docket = nodemap_data[0]
602 602 if (
603 603 len(d[0]) > docket.tip_rev
604 604 and d[0][docket.tip_rev][7] == docket.tip_node
605 605 ):
606 606 # no changelog tampering
607 607 self._nodemap_docket = docket
608 608 index.update_nodemap_data(*nodemap_data)
609 609 except (ValueError, IndexError):
610 610 raise error.RevlogError(
611 611 _(b"index %s is corrupted") % self.display_id
612 612 )
613 613 self.index, self._chunkcache = d
614 614 if not self._chunkcache:
615 615 self._chunkclear()
616 616 # revnum -> (chain-length, sum-delta-length)
617 617 self._chaininfocache = util.lrucachedict(500)
618 618 # revlog header -> revlog compressor
619 619 self._decompressors = {}
620 620
621 621 @util.propertycache
622 622 def revlog_kind(self):
623 623 return self.target[0]
624 624
625 625 @util.propertycache
626 626 def display_id(self):
627 627 """The public facing "ID" of the revlog that we use in message"""
628 628 # Maybe we should build a user facing representation of
629 629 # revlog.target instead of using `self.radix`
630 630 return self.radix
631 631
632 632 @util.propertycache
633 633 def _compressor(self):
634 634 engine = util.compengines[self._compengine]
635 635 return engine.revlogcompressor(self._compengineopts)
636 636
637 637 def _indexfp(self):
638 638 """file object for the revlog's index file"""
639 639 return self.opener(self._indexfile, mode=b"r")
640 640
641 641 def __index_write_fp(self):
642 642 # You should not use this directly and use `_writing` instead
643 643 try:
644 644 f = self.opener(
645 645 self._indexfile, mode=b"r+", checkambig=self._checkambig
646 646 )
647 647 if self._docket is None:
648 648 f.seek(0, os.SEEK_END)
649 649 else:
650 650 f.seek(self._docket.index_end, os.SEEK_SET)
651 651 return f
652 652 except IOError as inst:
653 653 if inst.errno != errno.ENOENT:
654 654 raise
655 655 return self.opener(
656 656 self._indexfile, mode=b"w+", checkambig=self._checkambig
657 657 )
658 658
659 659 def __index_new_fp(self):
660 660 # You should not use this unless you are upgrading from inline revlog
661 661 return self.opener(
662 662 self._indexfile,
663 663 mode=b"w",
664 664 checkambig=self._checkambig,
665 665 atomictemp=True,
666 666 )
667 667
668 668 def _datafp(self, mode=b'r'):
669 669 """file object for the revlog's data file"""
670 670 return self.opener(self._datafile, mode=mode)
671 671
672 672 @contextlib.contextmanager
673 673 def _datareadfp(self, existingfp=None):
674 674 """file object suitable to read data"""
675 675 # Use explicit file handle, if given.
676 676 if existingfp is not None:
677 677 yield existingfp
678 678
679 679 # Use a file handle being actively used for writes, if available.
680 680 # There is some danger to doing this because reads will seek the
681 681 # file. However, _writeentry() performs a SEEK_END before all writes,
682 682 # so we should be safe.
683 683 elif self._writinghandles:
684 684 if self._inline:
685 685 yield self._writinghandles[0]
686 686 else:
687 687 yield self._writinghandles[1]
688 688
689 689 # Otherwise open a new file handle.
690 690 else:
691 691 if self._inline:
692 692 func = self._indexfp
693 693 else:
694 694 func = self._datafp
695 695 with func() as fp:
696 696 yield fp
697 697
698 698 def tiprev(self):
699 699 return len(self.index) - 1
700 700
701 701 def tip(self):
702 702 return self.node(self.tiprev())
703 703
704 704 def __contains__(self, rev):
705 705 return 0 <= rev < len(self)
706 706
707 707 def __len__(self):
708 708 return len(self.index)
709 709
710 710 def __iter__(self):
711 711 return iter(pycompat.xrange(len(self)))
712 712
713 713 def revs(self, start=0, stop=None):
714 714 """iterate over all rev in this revlog (from start to stop)"""
715 715 return storageutil.iterrevs(len(self), start=start, stop=stop)
716 716
717 717 @property
718 718 def nodemap(self):
719 719 msg = (
720 720 b"revlog.nodemap is deprecated, "
721 721 b"use revlog.index.[has_node|rev|get_rev]"
722 722 )
723 723 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
724 724 return self.index.nodemap
725 725
726 726 @property
727 727 def _nodecache(self):
728 728 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
729 729 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
730 730 return self.index.nodemap
731 731
732 732 def hasnode(self, node):
733 733 try:
734 734 self.rev(node)
735 735 return True
736 736 except KeyError:
737 737 return False
738 738
739 739 def candelta(self, baserev, rev):
740 740 """whether two revisions (baserev, rev) can be delta-ed or not"""
741 741 # Disable delta if either rev requires a content-changing flag
742 742 # processor (ex. LFS). This is because such flag processor can alter
743 743 # the rawtext content that the delta will be based on, and two clients
744 744 # could have a same revlog node with different flags (i.e. different
745 745 # rawtext contents) and the delta could be incompatible.
746 746 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
747 747 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
748 748 ):
749 749 return False
750 750 return True
751 751
752 752 def update_caches(self, transaction):
753 753 if self._nodemap_file is not None:
754 754 if transaction is None:
755 755 nodemaputil.update_persistent_nodemap(self)
756 756 else:
757 757 nodemaputil.setup_persistent_nodemap(transaction, self)
758 758
759 759 def clearcaches(self):
760 760 self._revisioncache = None
761 761 self._chainbasecache.clear()
762 762 self._chunkcache = (0, b'')
763 763 self._pcache = {}
764 764 self._nodemap_docket = None
765 765 self.index.clearcaches()
766 766 # The python code is the one responsible for validating the docket, we
767 767 # end up having to refresh it here.
768 768 use_nodemap = (
769 769 not self._inline
770 770 and self._nodemap_file is not None
771 771 and util.safehasattr(self.index, 'update_nodemap_data')
772 772 )
773 773 if use_nodemap:
774 774 nodemap_data = nodemaputil.persisted_data(self)
775 775 if nodemap_data is not None:
776 776 self._nodemap_docket = nodemap_data[0]
777 777 self.index.update_nodemap_data(*nodemap_data)
778 778
779 779 def rev(self, node):
780 780 try:
781 781 return self.index.rev(node)
782 782 except TypeError:
783 783 raise
784 784 except error.RevlogError:
785 785 # parsers.c radix tree lookup failed
786 786 if (
787 787 node == self.nodeconstants.wdirid
788 788 or node in self.nodeconstants.wdirfilenodeids
789 789 ):
790 790 raise error.WdirUnsupported
791 791 raise error.LookupError(node, self.display_id, _(b'no node'))
792 792
793 793 # Accessors for index entries.
794 794
795 795 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
796 796 # are flags.
797 797 def start(self, rev):
798 798 return int(self.index[rev][0] >> 16)
799 799
800 800 def flags(self, rev):
801 801 return self.index[rev][0] & 0xFFFF
802 802
803 803 def length(self, rev):
804 804 return self.index[rev][1]
805 805
806 806 def sidedata_length(self, rev):
807 807 if not self.hassidedata:
808 808 return 0
809 809 return self.index[rev][9]
810 810
811 811 def rawsize(self, rev):
812 812 """return the length of the uncompressed text for a given revision"""
813 813 l = self.index[rev][2]
814 814 if l >= 0:
815 815 return l
816 816
817 817 t = self.rawdata(rev)
818 818 return len(t)
819 819
820 820 def size(self, rev):
821 821 """length of non-raw text (processed by a "read" flag processor)"""
822 822 # fast path: if no "read" flag processor could change the content,
823 823 # size is rawsize. note: ELLIPSIS is known to not change the content.
824 824 flags = self.flags(rev)
825 825 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
826 826 return self.rawsize(rev)
827 827
828 828 return len(self.revision(rev, raw=False))
829 829
830 830 def chainbase(self, rev):
831 831 base = self._chainbasecache.get(rev)
832 832 if base is not None:
833 833 return base
834 834
835 835 index = self.index
836 836 iterrev = rev
837 837 base = index[iterrev][3]
838 838 while base != iterrev:
839 839 iterrev = base
840 840 base = index[iterrev][3]
841 841
842 842 self._chainbasecache[rev] = base
843 843 return base
844 844
845 845 def linkrev(self, rev):
846 846 return self.index[rev][4]
847 847
848 848 def parentrevs(self, rev):
849 849 try:
850 850 entry = self.index[rev]
851 851 except IndexError:
852 852 if rev == wdirrev:
853 853 raise error.WdirUnsupported
854 854 raise
855 855 if entry[5] == nullrev:
856 856 return entry[6], entry[5]
857 857 else:
858 858 return entry[5], entry[6]
859 859
860 860 # fast parentrevs(rev) where rev isn't filtered
861 861 _uncheckedparentrevs = parentrevs
862 862
863 863 def node(self, rev):
864 864 try:
865 865 return self.index[rev][7]
866 866 except IndexError:
867 867 if rev == wdirrev:
868 868 raise error.WdirUnsupported
869 869 raise
870 870
871 871 # Derived from index values.
872 872
873 873 def end(self, rev):
874 874 return self.start(rev) + self.length(rev)
875 875
876 876 def parents(self, node):
877 877 i = self.index
878 878 d = i[self.rev(node)]
879 879 # inline node() to avoid function call overhead
880 880 if d[5] == self.nullid:
881 881 return i[d[6]][7], i[d[5]][7]
882 882 else:
883 883 return i[d[5]][7], i[d[6]][7]
884 884
885 885 def chainlen(self, rev):
886 886 return self._chaininfo(rev)[0]
887 887
888 888 def _chaininfo(self, rev):
889 889 chaininfocache = self._chaininfocache
890 890 if rev in chaininfocache:
891 891 return chaininfocache[rev]
892 892 index = self.index
893 893 generaldelta = self._generaldelta
894 894 iterrev = rev
895 895 e = index[iterrev]
896 896 clen = 0
897 897 compresseddeltalen = 0
898 898 while iterrev != e[3]:
899 899 clen += 1
900 900 compresseddeltalen += e[1]
901 901 if generaldelta:
902 902 iterrev = e[3]
903 903 else:
904 904 iterrev -= 1
905 905 if iterrev in chaininfocache:
906 906 t = chaininfocache[iterrev]
907 907 clen += t[0]
908 908 compresseddeltalen += t[1]
909 909 break
910 910 e = index[iterrev]
911 911 else:
912 912 # Add text length of base since decompressing that also takes
913 913 # work. For cache hits the length is already included.
914 914 compresseddeltalen += e[1]
915 915 r = (clen, compresseddeltalen)
916 916 chaininfocache[rev] = r
917 917 return r
918 918
919 919 def _deltachain(self, rev, stoprev=None):
920 920 """Obtain the delta chain for a revision.
921 921
922 922 ``stoprev`` specifies a revision to stop at. If not specified, we
923 923 stop at the base of the chain.
924 924
925 925 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
926 926 revs in ascending order and ``stopped`` is a bool indicating whether
927 927 ``stoprev`` was hit.
928 928 """
929 929 # Try C implementation.
930 930 try:
931 931 return self.index.deltachain(rev, stoprev, self._generaldelta)
932 932 except AttributeError:
933 933 pass
934 934
935 935 chain = []
936 936
937 937 # Alias to prevent attribute lookup in tight loop.
938 938 index = self.index
939 939 generaldelta = self._generaldelta
940 940
941 941 iterrev = rev
942 942 e = index[iterrev]
943 943 while iterrev != e[3] and iterrev != stoprev:
944 944 chain.append(iterrev)
945 945 if generaldelta:
946 946 iterrev = e[3]
947 947 else:
948 948 iterrev -= 1
949 949 e = index[iterrev]
950 950
951 951 if iterrev == stoprev:
952 952 stopped = True
953 953 else:
954 954 chain.append(iterrev)
955 955 stopped = False
956 956
957 957 chain.reverse()
958 958 return chain, stopped
959 959
960 960 def ancestors(self, revs, stoprev=0, inclusive=False):
961 961 """Generate the ancestors of 'revs' in reverse revision order.
962 962 Does not generate revs lower than stoprev.
963 963
964 964 See the documentation for ancestor.lazyancestors for more details."""
965 965
966 966 # first, make sure start revisions aren't filtered
967 967 revs = list(revs)
968 968 checkrev = self.node
969 969 for r in revs:
970 970 checkrev(r)
971 971 # and we're sure ancestors aren't filtered as well
972 972
973 973 if rustancestor is not None:
974 974 lazyancestors = rustancestor.LazyAncestors
975 975 arg = self.index
976 976 else:
977 977 lazyancestors = ancestor.lazyancestors
978 978 arg = self._uncheckedparentrevs
979 979 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
980 980
981 981 def descendants(self, revs):
982 982 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
983 983
984 984 def findcommonmissing(self, common=None, heads=None):
985 985 """Return a tuple of the ancestors of common and the ancestors of heads
986 986 that are not ancestors of common. In revset terminology, we return the
987 987 tuple:
988 988
989 989 ::common, (::heads) - (::common)
990 990
991 991 The list is sorted by revision number, meaning it is
992 992 topologically sorted.
993 993
994 994 'heads' and 'common' are both lists of node IDs. If heads is
995 995 not supplied, uses all of the revlog's heads. If common is not
996 996 supplied, uses nullid."""
997 997 if common is None:
998 998 common = [self.nullid]
999 999 if heads is None:
1000 1000 heads = self.heads()
1001 1001
1002 1002 common = [self.rev(n) for n in common]
1003 1003 heads = [self.rev(n) for n in heads]
1004 1004
1005 1005 # we want the ancestors, but inclusive
1006 1006 class lazyset(object):
1007 1007 def __init__(self, lazyvalues):
1008 1008 self.addedvalues = set()
1009 1009 self.lazyvalues = lazyvalues
1010 1010
1011 1011 def __contains__(self, value):
1012 1012 return value in self.addedvalues or value in self.lazyvalues
1013 1013
1014 1014 def __iter__(self):
1015 1015 added = self.addedvalues
1016 1016 for r in added:
1017 1017 yield r
1018 1018 for r in self.lazyvalues:
1019 1019 if not r in added:
1020 1020 yield r
1021 1021
1022 1022 def add(self, value):
1023 1023 self.addedvalues.add(value)
1024 1024
1025 1025 def update(self, values):
1026 1026 self.addedvalues.update(values)
1027 1027
1028 1028 has = lazyset(self.ancestors(common))
1029 1029 has.add(nullrev)
1030 1030 has.update(common)
1031 1031
1032 1032 # take all ancestors from heads that aren't in has
1033 1033 missing = set()
1034 1034 visit = collections.deque(r for r in heads if r not in has)
1035 1035 while visit:
1036 1036 r = visit.popleft()
1037 1037 if r in missing:
1038 1038 continue
1039 1039 else:
1040 1040 missing.add(r)
1041 1041 for p in self.parentrevs(r):
1042 1042 if p not in has:
1043 1043 visit.append(p)
1044 1044 missing = list(missing)
1045 1045 missing.sort()
1046 1046 return has, [self.node(miss) for miss in missing]
1047 1047
1048 1048 def incrementalmissingrevs(self, common=None):
1049 1049 """Return an object that can be used to incrementally compute the
1050 1050 revision numbers of the ancestors of arbitrary sets that are not
1051 1051 ancestors of common. This is an ancestor.incrementalmissingancestors
1052 1052 object.
1053 1053
1054 1054 'common' is a list of revision numbers. If common is not supplied, uses
1055 1055 nullrev.
1056 1056 """
1057 1057 if common is None:
1058 1058 common = [nullrev]
1059 1059
1060 1060 if rustancestor is not None:
1061 1061 return rustancestor.MissingAncestors(self.index, common)
1062 1062 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1063 1063
1064 1064 def findmissingrevs(self, common=None, heads=None):
1065 1065 """Return the revision numbers of the ancestors of heads that
1066 1066 are not ancestors of common.
1067 1067
1068 1068 More specifically, return a list of revision numbers corresponding to
1069 1069 nodes N such that every N satisfies the following constraints:
1070 1070
1071 1071 1. N is an ancestor of some node in 'heads'
1072 1072 2. N is not an ancestor of any node in 'common'
1073 1073
1074 1074 The list is sorted by revision number, meaning it is
1075 1075 topologically sorted.
1076 1076
1077 1077 'heads' and 'common' are both lists of revision numbers. If heads is
1078 1078 not supplied, uses all of the revlog's heads. If common is not
1079 1079 supplied, uses nullid."""
1080 1080 if common is None:
1081 1081 common = [nullrev]
1082 1082 if heads is None:
1083 1083 heads = self.headrevs()
1084 1084
1085 1085 inc = self.incrementalmissingrevs(common=common)
1086 1086 return inc.missingancestors(heads)
1087 1087
1088 1088 def findmissing(self, common=None, heads=None):
1089 1089 """Return the ancestors of heads that are not ancestors of common.
1090 1090
1091 1091 More specifically, return a list of nodes N such that every N
1092 1092 satisfies the following constraints:
1093 1093
1094 1094 1. N is an ancestor of some node in 'heads'
1095 1095 2. N is not an ancestor of any node in 'common'
1096 1096
1097 1097 The list is sorted by revision number, meaning it is
1098 1098 topologically sorted.
1099 1099
1100 1100 'heads' and 'common' are both lists of node IDs. If heads is
1101 1101 not supplied, uses all of the revlog's heads. If common is not
1102 1102 supplied, uses nullid."""
1103 1103 if common is None:
1104 1104 common = [self.nullid]
1105 1105 if heads is None:
1106 1106 heads = self.heads()
1107 1107
1108 1108 common = [self.rev(n) for n in common]
1109 1109 heads = [self.rev(n) for n in heads]
1110 1110
1111 1111 inc = self.incrementalmissingrevs(common=common)
1112 1112 return [self.node(r) for r in inc.missingancestors(heads)]
1113 1113
1114 1114 def nodesbetween(self, roots=None, heads=None):
1115 1115 """Return a topological path from 'roots' to 'heads'.
1116 1116
1117 1117 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1118 1118 topologically sorted list of all nodes N that satisfy both of
1119 1119 these constraints:
1120 1120
1121 1121 1. N is a descendant of some node in 'roots'
1122 1122 2. N is an ancestor of some node in 'heads'
1123 1123
1124 1124 Every node is considered to be both a descendant and an ancestor
1125 1125 of itself, so every reachable node in 'roots' and 'heads' will be
1126 1126 included in 'nodes'.
1127 1127
1128 1128 'outroots' is the list of reachable nodes in 'roots', i.e., the
1129 1129 subset of 'roots' that is returned in 'nodes'. Likewise,
1130 1130 'outheads' is the subset of 'heads' that is also in 'nodes'.
1131 1131
1132 1132 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1133 1133 unspecified, uses nullid as the only root. If 'heads' is
1134 1134 unspecified, uses list of all of the revlog's heads."""
1135 1135 nonodes = ([], [], [])
1136 1136 if roots is not None:
1137 1137 roots = list(roots)
1138 1138 if not roots:
1139 1139 return nonodes
1140 1140 lowestrev = min([self.rev(n) for n in roots])
1141 1141 else:
1142 1142 roots = [self.nullid] # Everybody's a descendant of nullid
1143 1143 lowestrev = nullrev
1144 1144 if (lowestrev == nullrev) and (heads is None):
1145 1145 # We want _all_ the nodes!
1146 1146 return (
1147 1147 [self.node(r) for r in self],
1148 1148 [self.nullid],
1149 1149 list(self.heads()),
1150 1150 )
1151 1151 if heads is None:
1152 1152 # All nodes are ancestors, so the latest ancestor is the last
1153 1153 # node.
1154 1154 highestrev = len(self) - 1
1155 1155 # Set ancestors to None to signal that every node is an ancestor.
1156 1156 ancestors = None
1157 1157 # Set heads to an empty dictionary for later discovery of heads
1158 1158 heads = {}
1159 1159 else:
1160 1160 heads = list(heads)
1161 1161 if not heads:
1162 1162 return nonodes
1163 1163 ancestors = set()
1164 1164 # Turn heads into a dictionary so we can remove 'fake' heads.
1165 1165 # Also, later we will be using it to filter out the heads we can't
1166 1166 # find from roots.
1167 1167 heads = dict.fromkeys(heads, False)
1168 1168 # Start at the top and keep marking parents until we're done.
1169 1169 nodestotag = set(heads)
1170 1170 # Remember where the top was so we can use it as a limit later.
1171 1171 highestrev = max([self.rev(n) for n in nodestotag])
1172 1172 while nodestotag:
1173 1173 # grab a node to tag
1174 1174 n = nodestotag.pop()
1175 1175 # Never tag nullid
1176 1176 if n == self.nullid:
1177 1177 continue
1178 1178 # A node's revision number represents its place in a
1179 1179 # topologically sorted list of nodes.
1180 1180 r = self.rev(n)
1181 1181 if r >= lowestrev:
1182 1182 if n not in ancestors:
1183 1183 # If we are possibly a descendant of one of the roots
1184 1184 # and we haven't already been marked as an ancestor
1185 1185 ancestors.add(n) # Mark as ancestor
1186 1186 # Add non-nullid parents to list of nodes to tag.
1187 1187 nodestotag.update(
1188 1188 [p for p in self.parents(n) if p != self.nullid]
1189 1189 )
1190 1190 elif n in heads: # We've seen it before, is it a fake head?
1191 1191 # So it is, real heads should not be the ancestors of
1192 1192 # any other heads.
1193 1193 heads.pop(n)
1194 1194 if not ancestors:
1195 1195 return nonodes
1196 1196 # Now that we have our set of ancestors, we want to remove any
1197 1197 # roots that are not ancestors.
1198 1198
1199 1199 # If one of the roots was nullid, everything is included anyway.
1200 1200 if lowestrev > nullrev:
1201 1201 # But, since we weren't, let's recompute the lowest rev to not
1202 1202 # include roots that aren't ancestors.
1203 1203
1204 1204 # Filter out roots that aren't ancestors of heads
1205 1205 roots = [root for root in roots if root in ancestors]
1206 1206 # Recompute the lowest revision
1207 1207 if roots:
1208 1208 lowestrev = min([self.rev(root) for root in roots])
1209 1209 else:
1210 1210 # No more roots? Return empty list
1211 1211 return nonodes
1212 1212 else:
1213 1213 # We are descending from nullid, and don't need to care about
1214 1214 # any other roots.
1215 1215 lowestrev = nullrev
1216 1216 roots = [self.nullid]
1217 1217 # Transform our roots list into a set.
1218 1218 descendants = set(roots)
1219 1219 # Also, keep the original roots so we can filter out roots that aren't
1220 1220 # 'real' roots (i.e. are descended from other roots).
1221 1221 roots = descendants.copy()
1222 1222 # Our topologically sorted list of output nodes.
1223 1223 orderedout = []
1224 1224 # Don't start at nullid since we don't want nullid in our output list,
1225 1225 # and if nullid shows up in descendants, empty parents will look like
1226 1226 # they're descendants.
1227 1227 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1228 1228 n = self.node(r)
1229 1229 isdescendant = False
1230 1230 if lowestrev == nullrev: # Everybody is a descendant of nullid
1231 1231 isdescendant = True
1232 1232 elif n in descendants:
1233 1233 # n is already a descendant
1234 1234 isdescendant = True
1235 1235 # This check only needs to be done here because all the roots
1236 1236 # will start being marked is descendants before the loop.
1237 1237 if n in roots:
1238 1238 # If n was a root, check if it's a 'real' root.
1239 1239 p = tuple(self.parents(n))
1240 1240 # If any of its parents are descendants, it's not a root.
1241 1241 if (p[0] in descendants) or (p[1] in descendants):
1242 1242 roots.remove(n)
1243 1243 else:
1244 1244 p = tuple(self.parents(n))
1245 1245 # A node is a descendant if either of its parents are
1246 1246 # descendants. (We seeded the dependents list with the roots
1247 1247 # up there, remember?)
1248 1248 if (p[0] in descendants) or (p[1] in descendants):
1249 1249 descendants.add(n)
1250 1250 isdescendant = True
1251 1251 if isdescendant and ((ancestors is None) or (n in ancestors)):
1252 1252 # Only include nodes that are both descendants and ancestors.
1253 1253 orderedout.append(n)
1254 1254 if (ancestors is not None) and (n in heads):
1255 1255 # We're trying to figure out which heads are reachable
1256 1256 # from roots.
1257 1257 # Mark this head as having been reached
1258 1258 heads[n] = True
1259 1259 elif ancestors is None:
1260 1260 # Otherwise, we're trying to discover the heads.
1261 1261 # Assume this is a head because if it isn't, the next step
1262 1262 # will eventually remove it.
1263 1263 heads[n] = True
1264 1264 # But, obviously its parents aren't.
1265 1265 for p in self.parents(n):
1266 1266 heads.pop(p, None)
1267 1267 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1268 1268 roots = list(roots)
1269 1269 assert orderedout
1270 1270 assert roots
1271 1271 assert heads
1272 1272 return (orderedout, roots, heads)
1273 1273
1274 1274 def headrevs(self, revs=None):
1275 1275 if revs is None:
1276 1276 try:
1277 1277 return self.index.headrevs()
1278 1278 except AttributeError:
1279 1279 return self._headrevs()
1280 1280 if rustdagop is not None:
1281 1281 return rustdagop.headrevs(self.index, revs)
1282 1282 return dagop.headrevs(revs, self._uncheckedparentrevs)
1283 1283
1284 1284 def computephases(self, roots):
1285 1285 return self.index.computephasesmapsets(roots)
1286 1286
1287 1287 def _headrevs(self):
1288 1288 count = len(self)
1289 1289 if not count:
1290 1290 return [nullrev]
1291 1291 # we won't iter over filtered rev so nobody is a head at start
1292 1292 ishead = [0] * (count + 1)
1293 1293 index = self.index
1294 1294 for r in self:
1295 1295 ishead[r] = 1 # I may be an head
1296 1296 e = index[r]
1297 1297 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1298 1298 return [r for r, val in enumerate(ishead) if val]
1299 1299
1300 1300 def heads(self, start=None, stop=None):
1301 1301 """return the list of all nodes that have no children
1302 1302
1303 1303 if start is specified, only heads that are descendants of
1304 1304 start will be returned
1305 1305 if stop is specified, it will consider all the revs from stop
1306 1306 as if they had no children
1307 1307 """
1308 1308 if start is None and stop is None:
1309 1309 if not len(self):
1310 1310 return [self.nullid]
1311 1311 return [self.node(r) for r in self.headrevs()]
1312 1312
1313 1313 if start is None:
1314 1314 start = nullrev
1315 1315 else:
1316 1316 start = self.rev(start)
1317 1317
1318 1318 stoprevs = {self.rev(n) for n in stop or []}
1319 1319
1320 1320 revs = dagop.headrevssubset(
1321 1321 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1322 1322 )
1323 1323
1324 1324 return [self.node(rev) for rev in revs]
1325 1325
1326 1326 def children(self, node):
1327 1327 """find the children of a given node"""
1328 1328 c = []
1329 1329 p = self.rev(node)
1330 1330 for r in self.revs(start=p + 1):
1331 1331 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1332 1332 if prevs:
1333 1333 for pr in prevs:
1334 1334 if pr == p:
1335 1335 c.append(self.node(r))
1336 1336 elif p == nullrev:
1337 1337 c.append(self.node(r))
1338 1338 return c
1339 1339
1340 1340 def commonancestorsheads(self, a, b):
1341 1341 """calculate all the heads of the common ancestors of nodes a and b"""
1342 1342 a, b = self.rev(a), self.rev(b)
1343 1343 ancs = self._commonancestorsheads(a, b)
1344 1344 return pycompat.maplist(self.node, ancs)
1345 1345
1346 1346 def _commonancestorsheads(self, *revs):
1347 1347 """calculate all the heads of the common ancestors of revs"""
1348 1348 try:
1349 1349 ancs = self.index.commonancestorsheads(*revs)
1350 1350 except (AttributeError, OverflowError): # C implementation failed
1351 1351 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1352 1352 return ancs
1353 1353
1354 1354 def isancestor(self, a, b):
1355 1355 """return True if node a is an ancestor of node b
1356 1356
1357 1357 A revision is considered an ancestor of itself."""
1358 1358 a, b = self.rev(a), self.rev(b)
1359 1359 return self.isancestorrev(a, b)
1360 1360
1361 1361 def isancestorrev(self, a, b):
1362 1362 """return True if revision a is an ancestor of revision b
1363 1363
1364 1364 A revision is considered an ancestor of itself.
1365 1365
1366 1366 The implementation of this is trivial but the use of
1367 1367 reachableroots is not."""
1368 1368 if a == nullrev:
1369 1369 return True
1370 1370 elif a == b:
1371 1371 return True
1372 1372 elif a > b:
1373 1373 return False
1374 1374 return bool(self.reachableroots(a, [b], [a], includepath=False))
1375 1375
1376 1376 def reachableroots(self, minroot, heads, roots, includepath=False):
1377 1377 """return (heads(::(<roots> and <roots>::<heads>)))
1378 1378
1379 1379 If includepath is True, return (<roots>::<heads>)."""
1380 1380 try:
1381 1381 return self.index.reachableroots2(
1382 1382 minroot, heads, roots, includepath
1383 1383 )
1384 1384 except AttributeError:
1385 1385 return dagop._reachablerootspure(
1386 1386 self.parentrevs, minroot, roots, heads, includepath
1387 1387 )
1388 1388
1389 1389 def ancestor(self, a, b):
1390 1390 """calculate the "best" common ancestor of nodes a and b"""
1391 1391
1392 1392 a, b = self.rev(a), self.rev(b)
1393 1393 try:
1394 1394 ancs = self.index.ancestors(a, b)
1395 1395 except (AttributeError, OverflowError):
1396 1396 ancs = ancestor.ancestors(self.parentrevs, a, b)
1397 1397 if ancs:
1398 1398 # choose a consistent winner when there's a tie
1399 1399 return min(map(self.node, ancs))
1400 1400 return self.nullid
1401 1401
1402 1402 def _match(self, id):
1403 1403 if isinstance(id, int):
1404 1404 # rev
1405 1405 return self.node(id)
1406 1406 if len(id) == self.nodeconstants.nodelen:
1407 1407 # possibly a binary node
1408 1408 # odds of a binary node being all hex in ASCII are 1 in 10**25
1409 1409 try:
1410 1410 node = id
1411 1411 self.rev(node) # quick search the index
1412 1412 return node
1413 1413 except error.LookupError:
1414 1414 pass # may be partial hex id
1415 1415 try:
1416 1416 # str(rev)
1417 1417 rev = int(id)
1418 1418 if b"%d" % rev != id:
1419 1419 raise ValueError
1420 1420 if rev < 0:
1421 1421 rev = len(self) + rev
1422 1422 if rev < 0 or rev >= len(self):
1423 1423 raise ValueError
1424 1424 return self.node(rev)
1425 1425 except (ValueError, OverflowError):
1426 1426 pass
1427 1427 if len(id) == 2 * self.nodeconstants.nodelen:
1428 1428 try:
1429 1429 # a full hex nodeid?
1430 1430 node = bin(id)
1431 1431 self.rev(node)
1432 1432 return node
1433 1433 except (TypeError, error.LookupError):
1434 1434 pass
1435 1435
1436 1436 def _partialmatch(self, id):
1437 1437 # we don't care wdirfilenodeids as they should be always full hash
1438 1438 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1439 1439 try:
1440 1440 partial = self.index.partialmatch(id)
1441 1441 if partial and self.hasnode(partial):
1442 1442 if maybewdir:
1443 1443 # single 'ff...' match in radix tree, ambiguous with wdir
1444 1444 raise error.RevlogError
1445 1445 return partial
1446 1446 if maybewdir:
1447 1447 # no 'ff...' match in radix tree, wdir identified
1448 1448 raise error.WdirUnsupported
1449 1449 return None
1450 1450 except error.RevlogError:
1451 1451 # parsers.c radix tree lookup gave multiple matches
1452 1452 # fast path: for unfiltered changelog, radix tree is accurate
1453 1453 if not getattr(self, 'filteredrevs', None):
1454 1454 raise error.AmbiguousPrefixLookupError(
1455 1455 id, self.display_id, _(b'ambiguous identifier')
1456 1456 )
1457 1457 # fall through to slow path that filters hidden revisions
1458 1458 except (AttributeError, ValueError):
1459 1459 # we are pure python, or key was too short to search radix tree
1460 1460 pass
1461 1461
1462 1462 if id in self._pcache:
1463 1463 return self._pcache[id]
1464 1464
1465 1465 if len(id) <= 40:
1466 1466 try:
1467 1467 # hex(node)[:...]
1468 1468 l = len(id) // 2 # grab an even number of digits
1469 1469 prefix = bin(id[: l * 2])
1470 1470 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1471 1471 nl = [
1472 1472 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1473 1473 ]
1474 1474 if self.nodeconstants.nullhex.startswith(id):
1475 1475 nl.append(self.nullid)
1476 1476 if len(nl) > 0:
1477 1477 if len(nl) == 1 and not maybewdir:
1478 1478 self._pcache[id] = nl[0]
1479 1479 return nl[0]
1480 1480 raise error.AmbiguousPrefixLookupError(
1481 1481 id, self.display_id, _(b'ambiguous identifier')
1482 1482 )
1483 1483 if maybewdir:
1484 1484 raise error.WdirUnsupported
1485 1485 return None
1486 1486 except TypeError:
1487 1487 pass
1488 1488
1489 1489 def lookup(self, id):
1490 1490 """locate a node based on:
1491 1491 - revision number or str(revision number)
1492 1492 - nodeid or subset of hex nodeid
1493 1493 """
1494 1494 n = self._match(id)
1495 1495 if n is not None:
1496 1496 return n
1497 1497 n = self._partialmatch(id)
1498 1498 if n:
1499 1499 return n
1500 1500
1501 1501 raise error.LookupError(id, self.display_id, _(b'no match found'))
1502 1502
1503 1503 def shortest(self, node, minlength=1):
1504 1504 """Find the shortest unambiguous prefix that matches node."""
1505 1505
1506 1506 def isvalid(prefix):
1507 1507 try:
1508 1508 matchednode = self._partialmatch(prefix)
1509 1509 except error.AmbiguousPrefixLookupError:
1510 1510 return False
1511 1511 except error.WdirUnsupported:
1512 1512 # single 'ff...' match
1513 1513 return True
1514 1514 if matchednode is None:
1515 1515 raise error.LookupError(node, self.display_id, _(b'no node'))
1516 1516 return True
1517 1517
1518 1518 def maybewdir(prefix):
1519 1519 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1520 1520
1521 1521 hexnode = hex(node)
1522 1522
1523 1523 def disambiguate(hexnode, minlength):
1524 1524 """Disambiguate against wdirid."""
1525 1525 for length in range(minlength, len(hexnode) + 1):
1526 1526 prefix = hexnode[:length]
1527 1527 if not maybewdir(prefix):
1528 1528 return prefix
1529 1529
1530 1530 if not getattr(self, 'filteredrevs', None):
1531 1531 try:
1532 1532 length = max(self.index.shortest(node), minlength)
1533 1533 return disambiguate(hexnode, length)
1534 1534 except error.RevlogError:
1535 1535 if node != self.nodeconstants.wdirid:
1536 1536 raise error.LookupError(
1537 1537 node, self.display_id, _(b'no node')
1538 1538 )
1539 1539 except AttributeError:
1540 1540 # Fall through to pure code
1541 1541 pass
1542 1542
1543 1543 if node == self.nodeconstants.wdirid:
1544 1544 for length in range(minlength, len(hexnode) + 1):
1545 1545 prefix = hexnode[:length]
1546 1546 if isvalid(prefix):
1547 1547 return prefix
1548 1548
1549 1549 for length in range(minlength, len(hexnode) + 1):
1550 1550 prefix = hexnode[:length]
1551 1551 if isvalid(prefix):
1552 1552 return disambiguate(hexnode, length)
1553 1553
1554 1554 def cmp(self, node, text):
1555 1555 """compare text with a given file revision
1556 1556
1557 1557 returns True if text is different than what is stored.
1558 1558 """
1559 1559 p1, p2 = self.parents(node)
1560 1560 return storageutil.hashrevisionsha1(text, p1, p2) != node
1561 1561
1562 1562 def _cachesegment(self, offset, data):
1563 1563 """Add a segment to the revlog cache.
1564 1564
1565 1565 Accepts an absolute offset and the data that is at that location.
1566 1566 """
1567 1567 o, d = self._chunkcache
1568 1568 # try to add to existing cache
1569 1569 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1570 1570 self._chunkcache = o, d + data
1571 1571 else:
1572 1572 self._chunkcache = offset, data
1573 1573
1574 1574 def _readsegment(self, offset, length, df=None):
1575 1575 """Load a segment of raw data from the revlog.
1576 1576
1577 1577 Accepts an absolute offset, length to read, and an optional existing
1578 1578 file handle to read from.
1579 1579
1580 1580 If an existing file handle is passed, it will be seeked and the
1581 1581 original seek position will NOT be restored.
1582 1582
1583 1583 Returns a str or buffer of raw byte data.
1584 1584
1585 1585 Raises if the requested number of bytes could not be read.
1586 1586 """
1587 1587 # Cache data both forward and backward around the requested
1588 1588 # data, in a fixed size window. This helps speed up operations
1589 1589 # involving reading the revlog backwards.
1590 1590 cachesize = self._chunkcachesize
1591 1591 realoffset = offset & ~(cachesize - 1)
1592 1592 reallength = (
1593 1593 (offset + length + cachesize) & ~(cachesize - 1)
1594 1594 ) - realoffset
1595 1595 with self._datareadfp(df) as df:
1596 1596 df.seek(realoffset)
1597 1597 d = df.read(reallength)
1598 1598
1599 1599 self._cachesegment(realoffset, d)
1600 1600 if offset != realoffset or reallength != length:
1601 1601 startoffset = offset - realoffset
1602 1602 if len(d) - startoffset < length:
1603 1603 raise error.RevlogError(
1604 1604 _(
1605 1605 b'partial read of revlog %s; expected %d bytes from '
1606 1606 b'offset %d, got %d'
1607 1607 )
1608 1608 % (
1609 1609 self._indexfile if self._inline else self._datafile,
1610 1610 length,
1611 1611 offset,
1612 1612 len(d) - startoffset,
1613 1613 )
1614 1614 )
1615 1615
1616 1616 return util.buffer(d, startoffset, length)
1617 1617
1618 1618 if len(d) < length:
1619 1619 raise error.RevlogError(
1620 1620 _(
1621 1621 b'partial read of revlog %s; expected %d bytes from offset '
1622 1622 b'%d, got %d'
1623 1623 )
1624 1624 % (
1625 1625 self._indexfile if self._inline else self._datafile,
1626 1626 length,
1627 1627 offset,
1628 1628 len(d),
1629 1629 )
1630 1630 )
1631 1631
1632 1632 return d
1633 1633
1634 1634 def _getsegment(self, offset, length, df=None):
1635 1635 """Obtain a segment of raw data from the revlog.
1636 1636
1637 1637 Accepts an absolute offset, length of bytes to obtain, and an
1638 1638 optional file handle to the already-opened revlog. If the file
1639 1639 handle is used, it's original seek position will not be preserved.
1640 1640
1641 1641 Requests for data may be returned from a cache.
1642 1642
1643 1643 Returns a str or a buffer instance of raw byte data.
1644 1644 """
1645 1645 o, d = self._chunkcache
1646 1646 l = len(d)
1647 1647
1648 1648 # is it in the cache?
1649 1649 cachestart = offset - o
1650 1650 cacheend = cachestart + length
1651 1651 if cachestart >= 0 and cacheend <= l:
1652 1652 if cachestart == 0 and cacheend == l:
1653 1653 return d # avoid a copy
1654 1654 return util.buffer(d, cachestart, cacheend - cachestart)
1655 1655
1656 1656 return self._readsegment(offset, length, df=df)
1657 1657
1658 1658 def _getsegmentforrevs(self, startrev, endrev, df=None):
1659 1659 """Obtain a segment of raw data corresponding to a range of revisions.
1660 1660
1661 1661 Accepts the start and end revisions and an optional already-open
1662 1662 file handle to be used for reading. If the file handle is read, its
1663 1663 seek position will not be preserved.
1664 1664
1665 1665 Requests for data may be satisfied by a cache.
1666 1666
1667 1667 Returns a 2-tuple of (offset, data) for the requested range of
1668 1668 revisions. Offset is the integer offset from the beginning of the
1669 1669 revlog and data is a str or buffer of the raw byte data.
1670 1670
1671 1671 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1672 1672 to determine where each revision's data begins and ends.
1673 1673 """
1674 1674 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1675 1675 # (functions are expensive).
1676 1676 index = self.index
1677 1677 istart = index[startrev]
1678 1678 start = int(istart[0] >> 16)
1679 1679 if startrev == endrev:
1680 1680 end = start + istart[1]
1681 1681 else:
1682 1682 iend = index[endrev]
1683 1683 end = int(iend[0] >> 16) + iend[1]
1684 1684
1685 1685 if self._inline:
1686 1686 start += (startrev + 1) * self.index.entry_size
1687 1687 end += (endrev + 1) * self.index.entry_size
1688 1688 length = end - start
1689 1689
1690 1690 return start, self._getsegment(start, length, df=df)
1691 1691
1692 1692 def _chunk(self, rev, df=None):
1693 1693 """Obtain a single decompressed chunk for a revision.
1694 1694
1695 1695 Accepts an integer revision and an optional already-open file handle
1696 1696 to be used for reading. If used, the seek position of the file will not
1697 1697 be preserved.
1698 1698
1699 1699 Returns a str holding uncompressed data for the requested revision.
1700 1700 """
1701 1701 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1702 1702
1703 1703 def _chunks(self, revs, df=None, targetsize=None):
1704 1704 """Obtain decompressed chunks for the specified revisions.
1705 1705
1706 1706 Accepts an iterable of numeric revisions that are assumed to be in
1707 1707 ascending order. Also accepts an optional already-open file handle
1708 1708 to be used for reading. If used, the seek position of the file will
1709 1709 not be preserved.
1710 1710
1711 1711 This function is similar to calling ``self._chunk()`` multiple times,
1712 1712 but is faster.
1713 1713
1714 1714 Returns a list with decompressed data for each requested revision.
1715 1715 """
1716 1716 if not revs:
1717 1717 return []
1718 1718 start = self.start
1719 1719 length = self.length
1720 1720 inline = self._inline
1721 1721 iosize = self.index.entry_size
1722 1722 buffer = util.buffer
1723 1723
1724 1724 l = []
1725 1725 ladd = l.append
1726 1726
1727 1727 if not self._withsparseread:
1728 1728 slicedchunks = (revs,)
1729 1729 else:
1730 1730 slicedchunks = deltautil.slicechunk(
1731 1731 self, revs, targetsize=targetsize
1732 1732 )
1733 1733
1734 1734 for revschunk in slicedchunks:
1735 1735 firstrev = revschunk[0]
1736 1736 # Skip trailing revisions with empty diff
1737 1737 for lastrev in revschunk[::-1]:
1738 1738 if length(lastrev) != 0:
1739 1739 break
1740 1740
1741 1741 try:
1742 1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1743 1743 except OverflowError:
1744 1744 # issue4215 - we can't cache a run of chunks greater than
1745 1745 # 2G on Windows
1746 1746 return [self._chunk(rev, df=df) for rev in revschunk]
1747 1747
1748 1748 decomp = self.decompress
1749 1749 for rev in revschunk:
1750 1750 chunkstart = start(rev)
1751 1751 if inline:
1752 1752 chunkstart += (rev + 1) * iosize
1753 1753 chunklength = length(rev)
1754 1754 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1755 1755
1756 1756 return l
1757 1757
1758 1758 def _chunkclear(self):
1759 1759 """Clear the raw chunk cache."""
1760 1760 self._chunkcache = (0, b'')
1761 1761
1762 1762 def deltaparent(self, rev):
1763 1763 """return deltaparent of the given revision"""
1764 1764 base = self.index[rev][3]
1765 1765 if base == rev:
1766 1766 return nullrev
1767 1767 elif self._generaldelta:
1768 1768 return base
1769 1769 else:
1770 1770 return rev - 1
1771 1771
1772 1772 def issnapshot(self, rev):
1773 1773 """tells whether rev is a snapshot"""
1774 1774 if not self._sparserevlog:
1775 1775 return self.deltaparent(rev) == nullrev
1776 1776 elif util.safehasattr(self.index, b'issnapshot'):
1777 1777 # directly assign the method to cache the testing and access
1778 1778 self.issnapshot = self.index.issnapshot
1779 1779 return self.issnapshot(rev)
1780 1780 if rev == nullrev:
1781 1781 return True
1782 1782 entry = self.index[rev]
1783 1783 base = entry[3]
1784 1784 if base == rev:
1785 1785 return True
1786 1786 if base == nullrev:
1787 1787 return True
1788 1788 p1 = entry[5]
1789 1789 p2 = entry[6]
1790 1790 if base == p1 or base == p2:
1791 1791 return False
1792 1792 return self.issnapshot(base)
1793 1793
1794 1794 def snapshotdepth(self, rev):
1795 1795 """number of snapshot in the chain before this one"""
1796 1796 if not self.issnapshot(rev):
1797 1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1798 1798 return len(self._deltachain(rev)[0]) - 1
1799 1799
1800 1800 def revdiff(self, rev1, rev2):
1801 1801 """return or calculate a delta between two revisions
1802 1802
1803 1803 The delta calculated is in binary form and is intended to be written to
1804 1804 revlog data directly. So this function needs raw revision data.
1805 1805 """
1806 1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1807 1807 return bytes(self._chunk(rev2))
1808 1808
1809 1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1810 1810
1811 1811 def _processflags(self, text, flags, operation, raw=False):
1812 1812 """deprecated entry point to access flag processors"""
1813 1813 msg = b'_processflag(...) use the specialized variant'
1814 1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 1815 if raw:
1816 1816 return text, flagutil.processflagsraw(self, text, flags)
1817 1817 elif operation == b'read':
1818 1818 return flagutil.processflagsread(self, text, flags)
1819 1819 else: # write operation
1820 1820 return flagutil.processflagswrite(self, text, flags)
1821 1821
1822 1822 def revision(self, nodeorrev, _df=None, raw=False):
1823 1823 """return an uncompressed revision of a given node or revision
1824 1824 number.
1825 1825
1826 1826 _df - an existing file handle to read from. (internal-only)
1827 1827 raw - an optional argument specifying if the revision data is to be
1828 1828 treated as raw data when applying flag transforms. 'raw' should be set
1829 1829 to True when generating changegroups or in debug commands.
1830 1830 """
1831 1831 if raw:
1832 1832 msg = (
1833 1833 b'revlog.revision(..., raw=True) is deprecated, '
1834 1834 b'use revlog.rawdata(...)'
1835 1835 )
1836 1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1837 1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1838 1838
1839 1839 def sidedata(self, nodeorrev, _df=None):
1840 1840 """a map of extra data related to the changeset but not part of the hash
1841 1841
1842 1842 This function currently return a dictionary. However, more advanced
1843 1843 mapping object will likely be used in the future for a more
1844 1844 efficient/lazy code.
1845 1845 """
1846 1846 return self._revisiondata(nodeorrev, _df)[1]
1847 1847
1848 1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1849 1849 # deal with <nodeorrev> argument type
1850 1850 if isinstance(nodeorrev, int):
1851 1851 rev = nodeorrev
1852 1852 node = self.node(rev)
1853 1853 else:
1854 1854 node = nodeorrev
1855 1855 rev = None
1856 1856
1857 1857 # fast path the special `nullid` rev
1858 1858 if node == self.nullid:
1859 1859 return b"", {}
1860 1860
1861 1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1862 1862 # revision or might need to be processed to retrieve the revision.
1863 1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1864 1864
1865 1865 if self.hassidedata:
1866 1866 if rev is None:
1867 1867 rev = self.rev(node)
1868 1868 sidedata = self._sidedata(rev)
1869 1869 else:
1870 1870 sidedata = {}
1871 1871
1872 1872 if raw and validated:
1873 1873 # if we don't want to process the raw text and that raw
1874 1874 # text is cached, we can exit early.
1875 1875 return rawtext, sidedata
1876 1876 if rev is None:
1877 1877 rev = self.rev(node)
1878 1878 # the revlog's flag for this revision
1879 1879 # (usually alter its state or content)
1880 1880 flags = self.flags(rev)
1881 1881
1882 1882 if validated and flags == REVIDX_DEFAULT_FLAGS:
1883 1883 # no extra flags set, no flag processor runs, text = rawtext
1884 1884 return rawtext, sidedata
1885 1885
1886 1886 if raw:
1887 1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 1888 text = rawtext
1889 1889 else:
1890 1890 r = flagutil.processflagsread(self, rawtext, flags)
1891 1891 text, validatehash = r
1892 1892 if validatehash:
1893 1893 self.checkhash(text, node, rev=rev)
1894 1894 if not validated:
1895 1895 self._revisioncache = (node, rev, rawtext)
1896 1896
1897 1897 return text, sidedata
1898 1898
1899 1899 def _rawtext(self, node, rev, _df=None):
1900 1900 """return the possibly unvalidated rawtext for a revision
1901 1901
1902 1902 returns (rev, rawtext, validated)
1903 1903 """
1904 1904
1905 1905 # revision in the cache (could be useful to apply delta)
1906 1906 cachedrev = None
1907 1907 # An intermediate text to apply deltas to
1908 1908 basetext = None
1909 1909
1910 1910 # Check if we have the entry in cache
1911 1911 # The cache entry looks like (node, rev, rawtext)
1912 1912 if self._revisioncache:
1913 1913 if self._revisioncache[0] == node:
1914 1914 return (rev, self._revisioncache[2], True)
1915 1915 cachedrev = self._revisioncache[1]
1916 1916
1917 1917 if rev is None:
1918 1918 rev = self.rev(node)
1919 1919
1920 1920 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1921 1921 if stopped:
1922 1922 basetext = self._revisioncache[2]
1923 1923
1924 1924 # drop cache to save memory, the caller is expected to
1925 1925 # update self._revisioncache after validating the text
1926 1926 self._revisioncache = None
1927 1927
1928 1928 targetsize = None
1929 1929 rawsize = self.index[rev][2]
1930 1930 if 0 <= rawsize:
1931 1931 targetsize = 4 * rawsize
1932 1932
1933 1933 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1934 1934 if basetext is None:
1935 1935 basetext = bytes(bins[0])
1936 1936 bins = bins[1:]
1937 1937
1938 1938 rawtext = mdiff.patches(basetext, bins)
1939 1939 del basetext # let us have a chance to free memory early
1940 1940 return (rev, rawtext, False)
1941 1941
1942 1942 def _sidedata(self, rev):
1943 1943 """Return the sidedata for a given revision number."""
1944 1944 index_entry = self.index[rev]
1945 1945 sidedata_offset = index_entry[8]
1946 1946 sidedata_size = index_entry[9]
1947 1947
1948 1948 if self._inline:
1949 1949 sidedata_offset += self.index.entry_size * (1 + rev)
1950 1950 if sidedata_size == 0:
1951 1951 return {}
1952 1952
1953 1953 segment = self._getsegment(sidedata_offset, sidedata_size)
1954 1954 sidedata = sidedatautil.deserialize_sidedata(segment)
1955 1955 return sidedata
1956 1956
1957 1957 def rawdata(self, nodeorrev, _df=None):
1958 1958 """return an uncompressed raw data of a given node or revision number.
1959 1959
1960 1960 _df - an existing file handle to read from. (internal-only)
1961 1961 """
1962 1962 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1963 1963
1964 1964 def hash(self, text, p1, p2):
1965 1965 """Compute a node hash.
1966 1966
1967 1967 Available as a function so that subclasses can replace the hash
1968 1968 as needed.
1969 1969 """
1970 1970 return storageutil.hashrevisionsha1(text, p1, p2)
1971 1971
1972 1972 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1973 1973 """Check node hash integrity.
1974 1974
1975 1975 Available as a function so that subclasses can extend hash mismatch
1976 1976 behaviors as needed.
1977 1977 """
1978 1978 try:
1979 1979 if p1 is None and p2 is None:
1980 1980 p1, p2 = self.parents(node)
1981 1981 if node != self.hash(text, p1, p2):
1982 1982 # Clear the revision cache on hash failure. The revision cache
1983 1983 # only stores the raw revision and clearing the cache does have
1984 1984 # the side-effect that we won't have a cache hit when the raw
1985 1985 # revision data is accessed. But this case should be rare and
1986 1986 # it is extra work to teach the cache about the hash
1987 1987 # verification state.
1988 1988 if self._revisioncache and self._revisioncache[0] == node:
1989 1989 self._revisioncache = None
1990 1990
1991 1991 revornode = rev
1992 1992 if revornode is None:
1993 1993 revornode = templatefilters.short(hex(node))
1994 1994 raise error.RevlogError(
1995 1995 _(b"integrity check failed on %s:%s")
1996 1996 % (self.display_id, pycompat.bytestr(revornode))
1997 1997 )
1998 1998 except error.RevlogError:
1999 1999 if self._censorable and storageutil.iscensoredtext(text):
2000 2000 raise error.CensoredNodeError(self.display_id, node, text)
2001 2001 raise
2002 2002
2003 2003 def _enforceinlinesize(self, tr):
2004 2004 """Check if the revlog is too big for inline and convert if so.
2005 2005
2006 2006 This should be called after revisions are added to the revlog. If the
2007 2007 revlog has grown too large to be an inline revlog, it will convert it
2008 2008 to use multiple index and data files.
2009 2009 """
2010 2010 tiprev = len(self) - 1
2011 2011 total_size = self.start(tiprev) + self.length(tiprev)
2012 2012 if not self._inline or total_size < _maxinline:
2013 2013 return
2014 2014
2015 2015 troffset = tr.findoffset(self._indexfile)
2016 2016 if troffset is None:
2017 2017 raise error.RevlogError(
2018 2018 _(b"%s not found in the transaction") % self._indexfile
2019 2019 )
2020 2020 trindex = 0
2021 2021 tr.add(self._datafile, 0)
2022 2022
2023 2023 existing_handles = False
2024 2024 if self._writinghandles is not None:
2025 2025 existing_handles = True
2026 2026 fp = self._writinghandles[0]
2027 2027 fp.flush()
2028 2028 fp.close()
2029 2029 # We can't use the cached file handle after close(). So prevent
2030 2030 # its usage.
2031 2031 self._writinghandles = None
2032 2032
2033 2033 new_dfh = self._datafp(b'w+')
2034 2034 new_dfh.truncate(0) # drop any potentially existing data
2035 2035 try:
2036 2036 with self._indexfp() as read_ifh:
2037 2037 for r in self:
2038 2038 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2039 2039 if troffset <= self.start(r):
2040 2040 trindex = r
2041 2041 new_dfh.flush()
2042 2042
2043 2043 with self.__index_new_fp() as fp:
2044 2044 self._format_flags &= ~FLAG_INLINE_DATA
2045 2045 self._inline = False
2046 2046 for i in self:
2047 2047 e = self.index.entry_binary(i)
2048 2048 if i == 0 and self._docket is None:
2049 2049 header = self._format_flags | self._format_version
2050 2050 header = self.index.pack_header(header)
2051 2051 e = header + e
2052 2052 fp.write(e)
2053 2053 if self._docket is not None:
2054 2054 self._docket.index_end = fp.tell()
2055 2055 # the temp file replace the real index when we exit the context
2056 2056 # manager
2057 2057
2058 2058 tr.replace(self._indexfile, trindex * self.index.entry_size)
2059 2059 nodemaputil.setup_persistent_nodemap(tr, self)
2060 2060 self._chunkclear()
2061 2061
2062 2062 if existing_handles:
2063 2063 # switched from inline to conventional reopen the index
2064 2064 ifh = self.__index_write_fp()
2065 2065 self._writinghandles = (ifh, new_dfh)
2066 2066 new_dfh = None
2067 2067 finally:
2068 2068 if new_dfh is not None:
2069 2069 new_dfh.close()
2070 2070
2071 2071 def _nodeduplicatecallback(self, transaction, node):
2072 2072 """called when trying to add a node already stored."""
2073 2073
2074 2074 @contextlib.contextmanager
2075 2075 def _writing(self, transaction):
2076 2076 if self._trypending:
2077 2077 msg = b'try to write in a `trypending` revlog: %s'
2078 2078 msg %= self.display_id
2079 2079 raise error.ProgrammingError(msg)
2080 2080 if self._writinghandles is not None:
2081 2081 yield
2082 2082 else:
2083 2083 r = len(self)
2084 2084 dsize = 0
2085 2085 if r:
2086 2086 dsize = self.end(r - 1)
2087 2087 dfh = None
2088 2088 if not self._inline:
2089 2089 try:
2090 2090 dfh = self._datafp(b"r+")
2091 2091 if self._docket is None:
2092 2092 dfh.seek(0, os.SEEK_END)
2093 2093 else:
2094 2094 dfh.seek(self._docket.data_end, os.SEEK_SET)
2095 2095 except IOError as inst:
2096 2096 if inst.errno != errno.ENOENT:
2097 2097 raise
2098 2098 dfh = self._datafp(b"w+")
2099 2099 transaction.add(self._datafile, dsize)
2100 2100 try:
2101 2101 isize = r * self.index.entry_size
2102 2102 ifh = self.__index_write_fp()
2103 2103 if self._inline:
2104 2104 transaction.add(self._indexfile, dsize + isize)
2105 2105 else:
2106 2106 transaction.add(self._indexfile, isize)
2107 2107 try:
2108 2108 self._writinghandles = (ifh, dfh)
2109 2109 try:
2110 2110 yield
2111 2111 if self._docket is not None:
2112 2112 self._write_docket(transaction)
2113 2113 finally:
2114 2114 self._writinghandles = None
2115 2115 finally:
2116 2116 ifh.close()
2117 2117 finally:
2118 2118 if dfh is not None:
2119 2119 dfh.close()
2120 2120
2121 2121 def _write_docket(self, transaction):
2122 2122 """write the current docket on disk
2123 2123
2124 2124 Exist as a method to help changelog to implement transaction logic
2125 2125
2126 2126 We could also imagine using the same transaction logic for all revlog
2127 2127 since docket are cheap."""
2128 2128 self._docket.write(transaction)
2129 2129
2130 2130 def addrevision(
2131 2131 self,
2132 2132 text,
2133 2133 transaction,
2134 2134 link,
2135 2135 p1,
2136 2136 p2,
2137 2137 cachedelta=None,
2138 2138 node=None,
2139 2139 flags=REVIDX_DEFAULT_FLAGS,
2140 2140 deltacomputer=None,
2141 2141 sidedata=None,
2142 2142 ):
2143 2143 """add a revision to the log
2144 2144
2145 2145 text - the revision data to add
2146 2146 transaction - the transaction object used for rollback
2147 2147 link - the linkrev data to add
2148 2148 p1, p2 - the parent nodeids of the revision
2149 2149 cachedelta - an optional precomputed delta
2150 2150 node - nodeid of revision; typically node is not specified, and it is
2151 2151 computed by default as hash(text, p1, p2), however subclasses might
2152 2152 use different hashing method (and override checkhash() in such case)
2153 2153 flags - the known flags to set on the revision
2154 2154 deltacomputer - an optional deltacomputer instance shared between
2155 2155 multiple calls
2156 2156 """
2157 2157 if link == nullrev:
2158 2158 raise error.RevlogError(
2159 2159 _(b"attempted to add linkrev -1 to %s") % self.display_id
2160 2160 )
2161 2161
2162 2162 if sidedata is None:
2163 2163 sidedata = {}
2164 2164 elif sidedata and not self.hassidedata:
2165 2165 raise error.ProgrammingError(
2166 2166 _(b"trying to add sidedata to a revlog who don't support them")
2167 2167 )
2168 2168
2169 2169 if flags:
2170 2170 node = node or self.hash(text, p1, p2)
2171 2171
2172 2172 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2173 2173
2174 2174 # If the flag processor modifies the revision data, ignore any provided
2175 2175 # cachedelta.
2176 2176 if rawtext != text:
2177 2177 cachedelta = None
2178 2178
2179 2179 if len(rawtext) > _maxentrysize:
2180 2180 raise error.RevlogError(
2181 2181 _(
2182 2182 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2183 2183 )
2184 2184 % (self.display_id, len(rawtext))
2185 2185 )
2186 2186
2187 2187 node = node or self.hash(rawtext, p1, p2)
2188 2188 rev = self.index.get_rev(node)
2189 2189 if rev is not None:
2190 2190 return rev
2191 2191
2192 2192 if validatehash:
2193 2193 self.checkhash(rawtext, node, p1=p1, p2=p2)
2194 2194
2195 2195 return self.addrawrevision(
2196 2196 rawtext,
2197 2197 transaction,
2198 2198 link,
2199 2199 p1,
2200 2200 p2,
2201 2201 node,
2202 2202 flags,
2203 2203 cachedelta=cachedelta,
2204 2204 deltacomputer=deltacomputer,
2205 2205 sidedata=sidedata,
2206 2206 )
2207 2207
2208 2208 def addrawrevision(
2209 2209 self,
2210 2210 rawtext,
2211 2211 transaction,
2212 2212 link,
2213 2213 p1,
2214 2214 p2,
2215 2215 node,
2216 2216 flags,
2217 2217 cachedelta=None,
2218 2218 deltacomputer=None,
2219 2219 sidedata=None,
2220 2220 ):
2221 2221 """add a raw revision with known flags, node and parents
2222 2222 useful when reusing a revision not stored in this revlog (ex: received
2223 2223 over wire, or read from an external bundle).
2224 2224 """
2225 2225 with self._writing(transaction):
2226 2226 return self._addrevision(
2227 2227 node,
2228 2228 rawtext,
2229 2229 transaction,
2230 2230 link,
2231 2231 p1,
2232 2232 p2,
2233 2233 flags,
2234 2234 cachedelta,
2235 2235 deltacomputer=deltacomputer,
2236 2236 sidedata=sidedata,
2237 2237 )
2238 2238
2239 2239 def compress(self, data):
2240 2240 """Generate a possibly-compressed representation of data."""
2241 2241 if not data:
2242 2242 return b'', data
2243 2243
2244 2244 compressed = self._compressor.compress(data)
2245 2245
2246 2246 if compressed:
2247 2247 # The revlog compressor added the header in the returned data.
2248 2248 return b'', compressed
2249 2249
2250 2250 if data[0:1] == b'\0':
2251 2251 return b'', data
2252 2252 return b'u', data
2253 2253
2254 2254 def decompress(self, data):
2255 2255 """Decompress a revlog chunk.
2256 2256
2257 2257 The chunk is expected to begin with a header identifying the
2258 2258 format type so it can be routed to an appropriate decompressor.
2259 2259 """
2260 2260 if not data:
2261 2261 return data
2262 2262
2263 2263 # Revlogs are read much more frequently than they are written and many
2264 2264 # chunks only take microseconds to decompress, so performance is
2265 2265 # important here.
2266 2266 #
2267 2267 # We can make a few assumptions about revlogs:
2268 2268 #
2269 2269 # 1) the majority of chunks will be compressed (as opposed to inline
2270 2270 # raw data).
2271 2271 # 2) decompressing *any* data will likely by at least 10x slower than
2272 2272 # returning raw inline data.
2273 2273 # 3) we want to prioritize common and officially supported compression
2274 2274 # engines
2275 2275 #
2276 2276 # It follows that we want to optimize for "decompress compressed data
2277 2277 # when encoded with common and officially supported compression engines"
2278 2278 # case over "raw data" and "data encoded by less common or non-official
2279 2279 # compression engines." That is why we have the inline lookup first
2280 2280 # followed by the compengines lookup.
2281 2281 #
2282 2282 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2283 2283 # compressed chunks. And this matters for changelog and manifest reads.
2284 2284 t = data[0:1]
2285 2285
2286 2286 if t == b'x':
2287 2287 try:
2288 2288 return _zlibdecompress(data)
2289 2289 except zlib.error as e:
2290 2290 raise error.RevlogError(
2291 2291 _(b'revlog decompress error: %s')
2292 2292 % stringutil.forcebytestr(e)
2293 2293 )
2294 2294 # '\0' is more common than 'u' so it goes first.
2295 2295 elif t == b'\0':
2296 2296 return data
2297 2297 elif t == b'u':
2298 2298 return util.buffer(data, 1)
2299 2299
2300 2300 try:
2301 2301 compressor = self._decompressors[t]
2302 2302 except KeyError:
2303 2303 try:
2304 2304 engine = util.compengines.forrevlogheader(t)
2305 2305 compressor = engine.revlogcompressor(self._compengineopts)
2306 2306 self._decompressors[t] = compressor
2307 2307 except KeyError:
2308 2308 raise error.RevlogError(
2309 2309 _(b'unknown compression type %s') % binascii.hexlify(t)
2310 2310 )
2311 2311
2312 2312 return compressor.decompress(data)
2313 2313
2314 2314 def _addrevision(
2315 2315 self,
2316 2316 node,
2317 2317 rawtext,
2318 2318 transaction,
2319 2319 link,
2320 2320 p1,
2321 2321 p2,
2322 2322 flags,
2323 2323 cachedelta,
2324 2324 alwayscache=False,
2325 2325 deltacomputer=None,
2326 2326 sidedata=None,
2327 2327 ):
2328 2328 """internal function to add revisions to the log
2329 2329
2330 2330 see addrevision for argument descriptions.
2331 2331
2332 2332 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2333 2333
2334 2334 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2335 2335 be used.
2336 2336
2337 2337 invariants:
2338 2338 - rawtext is optional (can be None); if not set, cachedelta must be set.
2339 2339 if both are set, they must correspond to each other.
2340 2340 """
2341 2341 if node == self.nullid:
2342 2342 raise error.RevlogError(
2343 2343 _(b"%s: attempt to add null revision") % self.display_id
2344 2344 )
2345 2345 if (
2346 2346 node == self.nodeconstants.wdirid
2347 2347 or node in self.nodeconstants.wdirfilenodeids
2348 2348 ):
2349 2349 raise error.RevlogError(
2350 2350 _(b"%s: attempt to add wdir revision") % self.display_id
2351 2351 )
2352 2352 if self._writinghandles is None:
2353 2353 msg = b'adding revision outside `revlog._writing` context'
2354 2354 raise error.ProgrammingError(msg)
2355 2355
2356 2356 if self._inline:
2357 2357 fh = self._writinghandles[0]
2358 2358 else:
2359 2359 fh = self._writinghandles[1]
2360 2360
2361 2361 btext = [rawtext]
2362 2362
2363 2363 curr = len(self)
2364 2364 prev = curr - 1
2365 2365
2366 2366 offset = self._get_data_offset(prev)
2367 2367
2368 2368 if self._concurrencychecker:
2369 2369 ifh, dfh = self._writinghandles
2370 2370 if self._inline:
2371 2371 # offset is "as if" it were in the .d file, so we need to add on
2372 2372 # the size of the entry metadata.
2373 2373 self._concurrencychecker(
2374 2374 ifh, self._indexfile, offset + curr * self.index.entry_size
2375 2375 )
2376 2376 else:
2377 2377 # Entries in the .i are a consistent size.
2378 2378 self._concurrencychecker(
2379 2379 ifh, self._indexfile, curr * self.index.entry_size
2380 2380 )
2381 2381 self._concurrencychecker(dfh, self._datafile, offset)
2382 2382
2383 2383 p1r, p2r = self.rev(p1), self.rev(p2)
2384 2384
2385 2385 # full versions are inserted when the needed deltas
2386 2386 # become comparable to the uncompressed text
2387 2387 if rawtext is None:
2388 2388 # need rawtext size, before changed by flag processors, which is
2389 2389 # the non-raw size. use revlog explicitly to avoid filelog's extra
2390 2390 # logic that might remove metadata size.
2391 2391 textlen = mdiff.patchedsize(
2392 2392 revlog.size(self, cachedelta[0]), cachedelta[1]
2393 2393 )
2394 2394 else:
2395 2395 textlen = len(rawtext)
2396 2396
2397 2397 if deltacomputer is None:
2398 2398 deltacomputer = deltautil.deltacomputer(self)
2399 2399
2400 2400 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2401 2401
2402 2402 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2403 2403
2404 2404 if sidedata and self.hassidedata:
2405 2405 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2406 2406 sidedata_offset = offset + deltainfo.deltalen
2407 2407 else:
2408 2408 serialized_sidedata = b""
2409 2409 # Don't store the offset if the sidedata is empty, that way
2410 2410 # we can easily detect empty sidedata and they will be no different
2411 2411 # than ones we manually add.
2412 2412 sidedata_offset = 0
2413 2413
2414 2414 e = (
2415 2415 offset_type(offset, flags),
2416 2416 deltainfo.deltalen,
2417 2417 textlen,
2418 2418 deltainfo.base,
2419 2419 link,
2420 2420 p1r,
2421 2421 p2r,
2422 2422 node,
2423 2423 sidedata_offset,
2424 2424 len(serialized_sidedata),
2425 2425 )
2426 2426
2427 2427 self.index.append(e)
2428 2428 entry = self.index.entry_binary(curr)
2429 2429 if curr == 0 and self._docket is None:
2430 2430 header = self._format_flags | self._format_version
2431 2431 header = self.index.pack_header(header)
2432 2432 entry = header + entry
2433 2433 self._writeentry(
2434 2434 transaction,
2435 2435 entry,
2436 2436 deltainfo.data,
2437 2437 link,
2438 2438 offset,
2439 2439 serialized_sidedata,
2440 2440 )
2441 2441
2442 2442 rawtext = btext[0]
2443 2443
2444 2444 if alwayscache and rawtext is None:
2445 2445 rawtext = deltacomputer.buildtext(revinfo, fh)
2446 2446
2447 2447 if type(rawtext) == bytes: # only accept immutable objects
2448 2448 self._revisioncache = (node, curr, rawtext)
2449 2449 self._chainbasecache[curr] = deltainfo.chainbase
2450 2450 return curr
2451 2451
2452 2452 def _get_data_offset(self, prev):
2453 2453 """Returns the current offset in the (in-transaction) data file.
2454 2454 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2455 2455 file to store that information: since sidedata can be rewritten to the
2456 2456 end of the data file within a transaction, you can have cases where, for
2457 2457 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2458 2458 to `n - 1`'s sidedata being written after `n`'s data.
2459 2459
2460 2460 TODO cache this in a docket file before getting out of experimental."""
2461 2461 if self._docket is None:
2462 2462 return self.end(prev)
2463 2463 else:
2464 2464 return self._docket.data_end
2465 2465
2466 2466 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2467 2467 # Files opened in a+ mode have inconsistent behavior on various
2468 2468 # platforms. Windows requires that a file positioning call be made
2469 2469 # when the file handle transitions between reads and writes. See
2470 2470 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2471 2471 # platforms, Python or the platform itself can be buggy. Some versions
2472 2472 # of Solaris have been observed to not append at the end of the file
2473 2473 # if the file was seeked to before the end. See issue4943 for more.
2474 2474 #
2475 2475 # We work around this issue by inserting a seek() before writing.
2476 2476 # Note: This is likely not necessary on Python 3. However, because
2477 2477 # the file handle is reused for reads and may be seeked there, we need
2478 2478 # to be careful before changing this.
2479 2479 if self._writinghandles is None:
2480 2480 msg = b'adding revision outside `revlog._writing` context'
2481 2481 raise error.ProgrammingError(msg)
2482 2482 ifh, dfh = self._writinghandles
2483 2483 if self._docket is None:
2484 2484 ifh.seek(0, os.SEEK_END)
2485 2485 else:
2486 2486 ifh.seek(self._docket.index_end, os.SEEK_SET)
2487 2487 if dfh:
2488 2488 if self._docket is None:
2489 2489 dfh.seek(0, os.SEEK_END)
2490 2490 else:
2491 2491 dfh.seek(self._docket.data_end, os.SEEK_SET)
2492 2492
2493 2493 curr = len(self) - 1
2494 2494 if not self._inline:
2495 2495 transaction.add(self._datafile, offset)
2496 2496 transaction.add(self._indexfile, curr * len(entry))
2497 2497 if data[0]:
2498 2498 dfh.write(data[0])
2499 2499 dfh.write(data[1])
2500 2500 if sidedata:
2501 2501 dfh.write(sidedata)
2502 2502 ifh.write(entry)
2503 2503 else:
2504 2504 offset += curr * self.index.entry_size
2505 2505 transaction.add(self._indexfile, offset)
2506 2506 ifh.write(entry)
2507 2507 ifh.write(data[0])
2508 2508 ifh.write(data[1])
2509 2509 if sidedata:
2510 2510 ifh.write(sidedata)
2511 2511 self._enforceinlinesize(transaction)
2512 2512 if self._docket is not None:
2513 2513 self._docket.index_end = self._writinghandles[0].tell()
2514 2514 self._docket.data_end = self._writinghandles[1].tell()
2515 2515
2516 2516 nodemaputil.setup_persistent_nodemap(transaction, self)
2517 2517
2518 2518 def addgroup(
2519 2519 self,
2520 2520 deltas,
2521 2521 linkmapper,
2522 2522 transaction,
2523 2523 alwayscache=False,
2524 2524 addrevisioncb=None,
2525 2525 duplicaterevisioncb=None,
2526 2526 ):
2527 2527 """
2528 2528 add a delta group
2529 2529
2530 2530 given a set of deltas, add them to the revision log. the
2531 2531 first delta is against its parent, which should be in our
2532 2532 log, the rest are against the previous delta.
2533 2533
2534 2534 If ``addrevisioncb`` is defined, it will be called with arguments of
2535 2535 this revlog and the node that was added.
2536 2536 """
2537 2537
2538 2538 if self._adding_group:
2539 2539 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2540 2540
2541 2541 self._adding_group = True
2542 2542 empty = True
2543 2543 try:
2544 2544 with self._writing(transaction):
2545 2545 deltacomputer = deltautil.deltacomputer(self)
2546 2546 # loop through our set of deltas
2547 2547 for data in deltas:
2548 2548 (
2549 2549 node,
2550 2550 p1,
2551 2551 p2,
2552 2552 linknode,
2553 2553 deltabase,
2554 2554 delta,
2555 2555 flags,
2556 2556 sidedata,
2557 2557 ) = data
2558 2558 link = linkmapper(linknode)
2559 2559 flags = flags or REVIDX_DEFAULT_FLAGS
2560 2560
2561 2561 rev = self.index.get_rev(node)
2562 2562 if rev is not None:
2563 2563 # this can happen if two branches make the same change
2564 2564 self._nodeduplicatecallback(transaction, rev)
2565 2565 if duplicaterevisioncb:
2566 2566 duplicaterevisioncb(self, rev)
2567 2567 empty = False
2568 2568 continue
2569 2569
2570 2570 for p in (p1, p2):
2571 2571 if not self.index.has_node(p):
2572 2572 raise error.LookupError(
2573 2573 p, self.radix, _(b'unknown parent')
2574 2574 )
2575 2575
2576 2576 if not self.index.has_node(deltabase):
2577 2577 raise error.LookupError(
2578 2578 deltabase, self.display_id, _(b'unknown delta base')
2579 2579 )
2580 2580
2581 2581 baserev = self.rev(deltabase)
2582 2582
2583 2583 if baserev != nullrev and self.iscensored(baserev):
2584 2584 # if base is censored, delta must be full replacement in a
2585 2585 # single patch operation
2586 2586 hlen = struct.calcsize(b">lll")
2587 2587 oldlen = self.rawsize(baserev)
2588 2588 newlen = len(delta) - hlen
2589 2589 if delta[:hlen] != mdiff.replacediffheader(
2590 2590 oldlen, newlen
2591 2591 ):
2592 2592 raise error.CensoredBaseError(
2593 2593 self.display_id, self.node(baserev)
2594 2594 )
2595 2595
2596 2596 if not flags and self._peek_iscensored(baserev, delta):
2597 2597 flags |= REVIDX_ISCENSORED
2598 2598
2599 2599 # We assume consumers of addrevisioncb will want to retrieve
2600 2600 # the added revision, which will require a call to
2601 2601 # revision(). revision() will fast path if there is a cache
2602 2602 # hit. So, we tell _addrevision() to always cache in this case.
2603 2603 # We're only using addgroup() in the context of changegroup
2604 2604 # generation so the revision data can always be handled as raw
2605 2605 # by the flagprocessor.
2606 2606 rev = self._addrevision(
2607 2607 node,
2608 2608 None,
2609 2609 transaction,
2610 2610 link,
2611 2611 p1,
2612 2612 p2,
2613 2613 flags,
2614 2614 (baserev, delta),
2615 2615 alwayscache=alwayscache,
2616 2616 deltacomputer=deltacomputer,
2617 2617 sidedata=sidedata,
2618 2618 )
2619 2619
2620 2620 if addrevisioncb:
2621 2621 addrevisioncb(self, rev)
2622 2622 empty = False
2623 2623 finally:
2624 2624 self._adding_group = False
2625 2625 return not empty
2626 2626
2627 2627 def iscensored(self, rev):
2628 2628 """Check if a file revision is censored."""
2629 2629 if not self._censorable:
2630 2630 return False
2631 2631
2632 2632 return self.flags(rev) & REVIDX_ISCENSORED
2633 2633
2634 2634 def _peek_iscensored(self, baserev, delta):
2635 2635 """Quickly check if a delta produces a censored revision."""
2636 2636 if not self._censorable:
2637 2637 return False
2638 2638
2639 2639 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2640 2640
2641 2641 def getstrippoint(self, minlink):
2642 2642 """find the minimum rev that must be stripped to strip the linkrev
2643 2643
2644 2644 Returns a tuple containing the minimum rev and a set of all revs that
2645 2645 have linkrevs that will be broken by this strip.
2646 2646 """
2647 2647 return storageutil.resolvestripinfo(
2648 2648 minlink,
2649 2649 len(self) - 1,
2650 2650 self.headrevs(),
2651 2651 self.linkrev,
2652 2652 self.parentrevs,
2653 2653 )
2654 2654
2655 2655 def strip(self, minlink, transaction):
2656 2656 """truncate the revlog on the first revision with a linkrev >= minlink
2657 2657
2658 2658 This function is called when we're stripping revision minlink and
2659 2659 its descendants from the repository.
2660 2660
2661 2661 We have to remove all revisions with linkrev >= minlink, because
2662 2662 the equivalent changelog revisions will be renumbered after the
2663 2663 strip.
2664 2664
2665 2665 So we truncate the revlog on the first of these revisions, and
2666 2666 trust that the caller has saved the revisions that shouldn't be
2667 2667 removed and that it'll re-add them after this truncation.
2668 2668 """
2669 2669 if len(self) == 0:
2670 2670 return
2671 2671
2672 2672 rev, _ = self.getstrippoint(minlink)
2673 2673 if rev == len(self):
2674 2674 return
2675 2675
2676 2676 # first truncate the files on disk
2677 2677 data_end = self.start(rev)
2678 2678 if not self._inline:
2679 2679 transaction.add(self._datafile, data_end)
2680 2680 end = rev * self.index.entry_size
2681 2681 else:
2682 2682 end = data_end + (rev * self.index.entry_size)
2683 2683
2684 2684 transaction.add(self._indexfile, end)
2685 2685 if self._docket is not None:
2686 2686 # XXX we could, leverage the docket while stripping. However it is
2687 2687 # not powerfull enough at the time of this comment
2688 2688 self._docket.index_end = end
2689 2689 self._docket.data_end = data_end
2690 2690 self._docket.write(transaction, stripping=True)
2691 2691
2692 2692 # then reset internal state in memory to forget those revisions
2693 2693 self._revisioncache = None
2694 2694 self._chaininfocache = util.lrucachedict(500)
2695 2695 self._chunkclear()
2696 2696
2697 2697 del self.index[rev:-1]
2698 2698
2699 2699 def checksize(self):
2700 2700 """Check size of index and data files
2701 2701
2702 2702 return a (dd, di) tuple.
2703 2703 - dd: extra bytes for the "data" file
2704 2704 - di: extra bytes for the "index" file
2705 2705
2706 2706 A healthy revlog will return (0, 0).
2707 2707 """
2708 2708 expected = 0
2709 2709 if len(self):
2710 2710 expected = max(0, self.end(len(self) - 1))
2711 2711
2712 2712 try:
2713 2713 with self._datafp() as f:
2714 2714 f.seek(0, io.SEEK_END)
2715 2715 actual = f.tell()
2716 2716 dd = actual - expected
2717 2717 except IOError as inst:
2718 2718 if inst.errno != errno.ENOENT:
2719 2719 raise
2720 2720 dd = 0
2721 2721
2722 2722 try:
2723 2723 f = self.opener(self._indexfile)
2724 2724 f.seek(0, io.SEEK_END)
2725 2725 actual = f.tell()
2726 2726 f.close()
2727 2727 s = self.index.entry_size
2728 2728 i = max(0, actual // s)
2729 2729 di = actual - (i * s)
2730 2730 if self._inline:
2731 2731 databytes = 0
2732 2732 for r in self:
2733 2733 databytes += max(0, self.length(r))
2734 2734 dd = 0
2735 2735 di = actual - len(self) * s - databytes
2736 2736 except IOError as inst:
2737 2737 if inst.errno != errno.ENOENT:
2738 2738 raise
2739 2739 di = 0
2740 2740
2741 2741 return (dd, di)
2742 2742
2743 2743 def files(self):
2744 2744 res = [self._indexfile]
2745 2745 if not self._inline:
2746 2746 res.append(self._datafile)
2747 2747 return res
2748 2748
2749 2749 def emitrevisions(
2750 2750 self,
2751 2751 nodes,
2752 2752 nodesorder=None,
2753 2753 revisiondata=False,
2754 2754 assumehaveparentrevisions=False,
2755 2755 deltamode=repository.CG_DELTAMODE_STD,
2756 2756 sidedata_helpers=None,
2757 2757 ):
2758 2758 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2759 2759 raise error.ProgrammingError(
2760 2760 b'unhandled value for nodesorder: %s' % nodesorder
2761 2761 )
2762 2762
2763 2763 if nodesorder is None and not self._generaldelta:
2764 2764 nodesorder = b'storage'
2765 2765
2766 2766 if (
2767 2767 not self._storedeltachains
2768 2768 and deltamode != repository.CG_DELTAMODE_PREV
2769 2769 ):
2770 2770 deltamode = repository.CG_DELTAMODE_FULL
2771 2771
2772 2772 return storageutil.emitrevisions(
2773 2773 self,
2774 2774 nodes,
2775 2775 nodesorder,
2776 2776 revlogrevisiondelta,
2777 2777 deltaparentfn=self.deltaparent,
2778 2778 candeltafn=self.candelta,
2779 2779 rawsizefn=self.rawsize,
2780 2780 revdifffn=self.revdiff,
2781 2781 flagsfn=self.flags,
2782 2782 deltamode=deltamode,
2783 2783 revisiondata=revisiondata,
2784 2784 assumehaveparentrevisions=assumehaveparentrevisions,
2785 2785 sidedata_helpers=sidedata_helpers,
2786 2786 )
2787 2787
2788 2788 DELTAREUSEALWAYS = b'always'
2789 2789 DELTAREUSESAMEREVS = b'samerevs'
2790 2790 DELTAREUSENEVER = b'never'
2791 2791
2792 2792 DELTAREUSEFULLADD = b'fulladd'
2793 2793
2794 2794 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2795 2795
2796 2796 def clone(
2797 2797 self,
2798 2798 tr,
2799 2799 destrevlog,
2800 2800 addrevisioncb=None,
2801 2801 deltareuse=DELTAREUSESAMEREVS,
2802 2802 forcedeltabothparents=None,
2803 2803 sidedata_helpers=None,
2804 2804 ):
2805 2805 """Copy this revlog to another, possibly with format changes.
2806 2806
2807 2807 The destination revlog will contain the same revisions and nodes.
2808 2808 However, it may not be bit-for-bit identical due to e.g. delta encoding
2809 2809 differences.
2810 2810
2811 2811 The ``deltareuse`` argument control how deltas from the existing revlog
2812 2812 are preserved in the destination revlog. The argument can have the
2813 2813 following values:
2814 2814
2815 2815 DELTAREUSEALWAYS
2816 2816 Deltas will always be reused (if possible), even if the destination
2817 2817 revlog would not select the same revisions for the delta. This is the
2818 2818 fastest mode of operation.
2819 2819 DELTAREUSESAMEREVS
2820 2820 Deltas will be reused if the destination revlog would pick the same
2821 2821 revisions for the delta. This mode strikes a balance between speed
2822 2822 and optimization.
2823 2823 DELTAREUSENEVER
2824 2824 Deltas will never be reused. This is the slowest mode of execution.
2825 2825 This mode can be used to recompute deltas (e.g. if the diff/delta
2826 2826 algorithm changes).
2827 2827 DELTAREUSEFULLADD
2828 2828 Revision will be re-added as if their were new content. This is
2829 2829 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2830 2830 eg: large file detection and handling.
2831 2831
2832 2832 Delta computation can be slow, so the choice of delta reuse policy can
2833 2833 significantly affect run time.
2834 2834
2835 2835 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2836 2836 two extremes. Deltas will be reused if they are appropriate. But if the
2837 2837 delta could choose a better revision, it will do so. This means if you
2838 2838 are converting a non-generaldelta revlog to a generaldelta revlog,
2839 2839 deltas will be recomputed if the delta's parent isn't a parent of the
2840 2840 revision.
2841 2841
2842 2842 In addition to the delta policy, the ``forcedeltabothparents``
2843 2843 argument controls whether to force compute deltas against both parents
2844 2844 for merges. By default, the current default is used.
2845 2845
2846 2846 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2847 2847 `sidedata_helpers`.
2848 2848 """
2849 2849 if deltareuse not in self.DELTAREUSEALL:
2850 2850 raise ValueError(
2851 2851 _(b'value for deltareuse invalid: %s') % deltareuse
2852 2852 )
2853 2853
2854 2854 if len(destrevlog):
2855 2855 raise ValueError(_(b'destination revlog is not empty'))
2856 2856
2857 2857 if getattr(self, 'filteredrevs', None):
2858 2858 raise ValueError(_(b'source revlog has filtered revisions'))
2859 2859 if getattr(destrevlog, 'filteredrevs', None):
2860 2860 raise ValueError(_(b'destination revlog has filtered revisions'))
2861 2861
2862 2862 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2863 2863 # if possible.
2864 2864 oldlazydelta = destrevlog._lazydelta
2865 2865 oldlazydeltabase = destrevlog._lazydeltabase
2866 2866 oldamd = destrevlog._deltabothparents
2867 2867
2868 2868 try:
2869 2869 if deltareuse == self.DELTAREUSEALWAYS:
2870 2870 destrevlog._lazydeltabase = True
2871 2871 destrevlog._lazydelta = True
2872 2872 elif deltareuse == self.DELTAREUSESAMEREVS:
2873 2873 destrevlog._lazydeltabase = False
2874 2874 destrevlog._lazydelta = True
2875 2875 elif deltareuse == self.DELTAREUSENEVER:
2876 2876 destrevlog._lazydeltabase = False
2877 2877 destrevlog._lazydelta = False
2878 2878
2879 2879 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2880 2880
2881 2881 self._clone(
2882 2882 tr,
2883 2883 destrevlog,
2884 2884 addrevisioncb,
2885 2885 deltareuse,
2886 2886 forcedeltabothparents,
2887 2887 sidedata_helpers,
2888 2888 )
2889 2889
2890 2890 finally:
2891 2891 destrevlog._lazydelta = oldlazydelta
2892 2892 destrevlog._lazydeltabase = oldlazydeltabase
2893 2893 destrevlog._deltabothparents = oldamd
2894 2894
2895 2895 def _clone(
2896 2896 self,
2897 2897 tr,
2898 2898 destrevlog,
2899 2899 addrevisioncb,
2900 2900 deltareuse,
2901 2901 forcedeltabothparents,
2902 2902 sidedata_helpers,
2903 2903 ):
2904 2904 """perform the core duty of `revlog.clone` after parameter processing"""
2905 2905 deltacomputer = deltautil.deltacomputer(destrevlog)
2906 2906 index = self.index
2907 2907 for rev in self:
2908 2908 entry = index[rev]
2909 2909
2910 2910 # Some classes override linkrev to take filtered revs into
2911 2911 # account. Use raw entry from index.
2912 2912 flags = entry[0] & 0xFFFF
2913 2913 linkrev = entry[4]
2914 2914 p1 = index[entry[5]][7]
2915 2915 p2 = index[entry[6]][7]
2916 2916 node = entry[7]
2917 2917
2918 2918 # (Possibly) reuse the delta from the revlog if allowed and
2919 2919 # the revlog chunk is a delta.
2920 2920 cachedelta = None
2921 2921 rawtext = None
2922 2922 if deltareuse == self.DELTAREUSEFULLADD:
2923 2923 text, sidedata = self._revisiondata(rev)
2924 2924
2925 2925 if sidedata_helpers is not None:
2926 2926 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2927 2927 self, sidedata_helpers, sidedata, rev
2928 2928 )
2929 2929 flags = flags | new_flags[0] & ~new_flags[1]
2930 2930
2931 2931 destrevlog.addrevision(
2932 2932 text,
2933 2933 tr,
2934 2934 linkrev,
2935 2935 p1,
2936 2936 p2,
2937 2937 cachedelta=cachedelta,
2938 2938 node=node,
2939 2939 flags=flags,
2940 2940 deltacomputer=deltacomputer,
2941 2941 sidedata=sidedata,
2942 2942 )
2943 2943 else:
2944 2944 if destrevlog._lazydelta:
2945 2945 dp = self.deltaparent(rev)
2946 2946 if dp != nullrev:
2947 2947 cachedelta = (dp, bytes(self._chunk(rev)))
2948 2948
2949 2949 sidedata = None
2950 2950 if not cachedelta:
2951 2951 rawtext, sidedata = self._revisiondata(rev)
2952 2952 if sidedata is None:
2953 2953 sidedata = self.sidedata(rev)
2954 2954
2955 2955 if sidedata_helpers is not None:
2956 2956 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2957 2957 self, sidedata_helpers, sidedata, rev
2958 2958 )
2959 2959 flags = flags | new_flags[0] & ~new_flags[1]
2960 2960
2961 2961 with destrevlog._writing(tr):
2962 2962 destrevlog._addrevision(
2963 2963 node,
2964 2964 rawtext,
2965 2965 tr,
2966 2966 linkrev,
2967 2967 p1,
2968 2968 p2,
2969 2969 flags,
2970 2970 cachedelta,
2971 2971 deltacomputer=deltacomputer,
2972 2972 sidedata=sidedata,
2973 2973 )
2974 2974
2975 2975 if addrevisioncb:
2976 2976 addrevisioncb(self, rev, node)
2977 2977
2978 2978 def censorrevision(self, tr, censornode, tombstone=b''):
2979 2979 if self._format_version == REVLOGV0:
2980 2980 raise error.RevlogError(
2981 2981 _(b'cannot censor with version %d revlogs')
2982 2982 % self._format_version
2983 2983 )
2984 2984
2985 2985 censorrev = self.rev(censornode)
2986 2986 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2987 2987
2988 2988 if len(tombstone) > self.rawsize(censorrev):
2989 2989 raise error.Abort(
2990 2990 _(b'censor tombstone must be no longer than censored data')
2991 2991 )
2992 2992
2993 2993 # Rewriting the revlog in place is hard. Our strategy for censoring is
2994 2994 # to create a new revlog, copy all revisions to it, then replace the
2995 2995 # revlogs on transaction close.
2996 2996 #
2997 2997 # This is a bit dangerous. We could easily have a mismatch of state.
2998 2998 newrl = revlog(
2999 2999 self.opener,
3000 3000 target=self.target,
3001 3001 radix=self.radix,
3002 3002 postfix=b'tmpcensored',
3003 3003 censorable=True,
3004 3004 )
3005 3005 newrl._format_version = self._format_version
3006 3006 newrl._format_flags = self._format_flags
3007 3007 newrl._generaldelta = self._generaldelta
3008 3008 newrl._parse_index = self._parse_index
3009 3009
3010 3010 for rev in self.revs():
3011 3011 node = self.node(rev)
3012 3012 p1, p2 = self.parents(node)
3013 3013
3014 3014 if rev == censorrev:
3015 3015 newrl.addrawrevision(
3016 3016 tombstone,
3017 3017 tr,
3018 3018 self.linkrev(censorrev),
3019 3019 p1,
3020 3020 p2,
3021 3021 censornode,
3022 3022 REVIDX_ISCENSORED,
3023 3023 )
3024 3024
3025 3025 if newrl.deltaparent(rev) != nullrev:
3026 3026 raise error.Abort(
3027 3027 _(
3028 3028 b'censored revision stored as delta; '
3029 3029 b'cannot censor'
3030 3030 ),
3031 3031 hint=_(
3032 3032 b'censoring of revlogs is not '
3033 3033 b'fully implemented; please report '
3034 3034 b'this bug'
3035 3035 ),
3036 3036 )
3037 3037 continue
3038 3038
3039 3039 if self.iscensored(rev):
3040 3040 if self.deltaparent(rev) != nullrev:
3041 3041 raise error.Abort(
3042 3042 _(
3043 3043 b'cannot censor due to censored '
3044 3044 b'revision having delta stored'
3045 3045 )
3046 3046 )
3047 3047 rawtext = self._chunk(rev)
3048 3048 else:
3049 3049 rawtext = self.rawdata(rev)
3050 3050
3051 3051 newrl.addrawrevision(
3052 3052 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3053 3053 )
3054 3054
3055 3055 tr.addbackup(self._indexfile, location=b'store')
3056 3056 if not self._inline:
3057 3057 tr.addbackup(self._datafile, location=b'store')
3058 3058
3059 3059 self.opener.rename(newrl._indexfile, self._indexfile)
3060 3060 if not self._inline:
3061 3061 self.opener.rename(newrl._datafile, self._datafile)
3062 3062
3063 3063 self.clearcaches()
3064 3064 self._loadindex()
3065 3065
3066 3066 def verifyintegrity(self, state):
3067 3067 """Verifies the integrity of the revlog.
3068 3068
3069 3069 Yields ``revlogproblem`` instances describing problems that are
3070 3070 found.
3071 3071 """
3072 3072 dd, di = self.checksize()
3073 3073 if dd:
3074 3074 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3075 3075 if di:
3076 3076 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3077 3077
3078 3078 version = self._format_version
3079 3079
3080 3080 # The verifier tells us what version revlog we should be.
3081 3081 if version != state[b'expectedversion']:
3082 3082 yield revlogproblem(
3083 3083 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3084 3084 % (self.display_id, version, state[b'expectedversion'])
3085 3085 )
3086 3086
3087 3087 state[b'skipread'] = set()
3088 3088 state[b'safe_renamed'] = set()
3089 3089
3090 3090 for rev in self:
3091 3091 node = self.node(rev)
3092 3092
3093 3093 # Verify contents. 4 cases to care about:
3094 3094 #
3095 3095 # common: the most common case
3096 3096 # rename: with a rename
3097 3097 # meta: file content starts with b'\1\n', the metadata
3098 3098 # header defined in filelog.py, but without a rename
3099 3099 # ext: content stored externally
3100 3100 #
3101 3101 # More formally, their differences are shown below:
3102 3102 #
3103 3103 # | common | rename | meta | ext
3104 3104 # -------------------------------------------------------
3105 3105 # flags() | 0 | 0 | 0 | not 0
3106 3106 # renamed() | False | True | False | ?
3107 3107 # rawtext[0:2]=='\1\n'| False | True | True | ?
3108 3108 #
3109 3109 # "rawtext" means the raw text stored in revlog data, which
3110 3110 # could be retrieved by "rawdata(rev)". "text"
3111 3111 # mentioned below is "revision(rev)".
3112 3112 #
3113 3113 # There are 3 different lengths stored physically:
3114 3114 # 1. L1: rawsize, stored in revlog index
3115 3115 # 2. L2: len(rawtext), stored in revlog data
3116 3116 # 3. L3: len(text), stored in revlog data if flags==0, or
3117 3117 # possibly somewhere else if flags!=0
3118 3118 #
3119 3119 # L1 should be equal to L2. L3 could be different from them.
3120 3120 # "text" may or may not affect commit hash depending on flag
3121 3121 # processors (see flagutil.addflagprocessor).
3122 3122 #
3123 3123 # | common | rename | meta | ext
3124 3124 # -------------------------------------------------
3125 3125 # rawsize() | L1 | L1 | L1 | L1
3126 3126 # size() | L1 | L2-LM | L1(*) | L1 (?)
3127 3127 # len(rawtext) | L2 | L2 | L2 | L2
3128 3128 # len(text) | L2 | L2 | L2 | L3
3129 3129 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3130 3130 #
3131 3131 # LM: length of metadata, depending on rawtext
3132 3132 # (*): not ideal, see comment in filelog.size
3133 3133 # (?): could be "- len(meta)" if the resolved content has
3134 3134 # rename metadata
3135 3135 #
3136 3136 # Checks needed to be done:
3137 3137 # 1. length check: L1 == L2, in all cases.
3138 3138 # 2. hash check: depending on flag processor, we may need to
3139 3139 # use either "text" (external), or "rawtext" (in revlog).
3140 3140
3141 3141 try:
3142 3142 skipflags = state.get(b'skipflags', 0)
3143 3143 if skipflags:
3144 3144 skipflags &= self.flags(rev)
3145 3145
3146 3146 _verify_revision(self, skipflags, state, node)
3147 3147
3148 3148 l1 = self.rawsize(rev)
3149 3149 l2 = len(self.rawdata(node))
3150 3150
3151 3151 if l1 != l2:
3152 3152 yield revlogproblem(
3153 3153 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3154 3154 node=node,
3155 3155 )
3156 3156
3157 3157 except error.CensoredNodeError:
3158 3158 if state[b'erroroncensored']:
3159 3159 yield revlogproblem(
3160 3160 error=_(b'censored file data'), node=node
3161 3161 )
3162 3162 state[b'skipread'].add(node)
3163 3163 except Exception as e:
3164 3164 yield revlogproblem(
3165 3165 error=_(b'unpacking %s: %s')
3166 3166 % (short(node), stringutil.forcebytestr(e)),
3167 3167 node=node,
3168 3168 )
3169 3169 state[b'skipread'].add(node)
3170 3170
3171 3171 def storageinfo(
3172 3172 self,
3173 3173 exclusivefiles=False,
3174 3174 sharedfiles=False,
3175 3175 revisionscount=False,
3176 3176 trackedsize=False,
3177 3177 storedsize=False,
3178 3178 ):
3179 3179 d = {}
3180 3180
3181 3181 if exclusivefiles:
3182 3182 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3183 3183 if not self._inline:
3184 3184 d[b'exclusivefiles'].append((self.opener, self._datafile))
3185 3185
3186 3186 if sharedfiles:
3187 3187 d[b'sharedfiles'] = []
3188 3188
3189 3189 if revisionscount:
3190 3190 d[b'revisionscount'] = len(self)
3191 3191
3192 3192 if trackedsize:
3193 3193 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3194 3194
3195 3195 if storedsize:
3196 3196 d[b'storedsize'] = sum(
3197 3197 self.opener.stat(path).st_size for path in self.files()
3198 3198 )
3199 3199
3200 3200 return d
3201 3201
3202 3202 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3203 3203 if not self.hassidedata:
3204 3204 return
3205 3205 # revlog formats with sidedata support does not support inline
3206 3206 assert not self._inline
3207 3207 if not helpers[1] and not helpers[2]:
3208 3208 # Nothing to generate or remove
3209 3209 return
3210 3210
3211 3211 new_entries = []
3212 3212 # append the new sidedata
3213 3213 with self._writing(transaction):
3214 3214 ifh, dfh = self._writinghandles
3215 3215 if self._docket is not None:
3216 3216 dfh.seek(self._docket.data_end, os.SEEK_SET)
3217 3217 else:
3218 3218 dfh.seek(0, os.SEEK_END)
3219 3219
3220 3220 current_offset = dfh.tell()
3221 3221 for rev in range(startrev, endrev + 1):
3222 3222 entry = self.index[rev]
3223 3223 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3224 3224 store=self,
3225 3225 sidedata_helpers=helpers,
3226 3226 sidedata={},
3227 3227 rev=rev,
3228 3228 )
3229 3229
3230 3230 serialized_sidedata = sidedatautil.serialize_sidedata(
3231 3231 new_sidedata
3232 3232 )
3233 3233 if entry[8] != 0 or entry[9] != 0:
3234 3234 # rewriting entries that already have sidedata is not
3235 3235 # supported yet, because it introduces garbage data in the
3236 3236 # revlog.
3237 3237 msg = b"rewriting existing sidedata is not supported yet"
3238 3238 raise error.Abort(msg)
3239 3239
3240 3240 # Apply (potential) flags to add and to remove after running
3241 3241 # the sidedata helpers
3242 3242 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3243 entry = (new_offset_flags,) + entry[1:8]
3244 entry += (current_offset, len(serialized_sidedata))
3243 entry_update = (
3244 current_offset,
3245 len(serialized_sidedata),
3246 new_offset_flags,
3247 )
3245 3248
3246 3249 # the sidedata computation might have move the file cursors around
3247 3250 dfh.seek(current_offset, os.SEEK_SET)
3248 3251 dfh.write(serialized_sidedata)
3249 new_entries.append(entry)
3252 new_entries.append(entry_update)
3250 3253 current_offset += len(serialized_sidedata)
3251 3254 if self._docket is not None:
3252 3255 self._docket.data_end = dfh.tell()
3253 3256
3254 3257 # rewrite the new index entries
3255 3258 ifh.seek(startrev * self.index.entry_size)
3256 3259 for i, e in enumerate(new_entries):
3257 3260 rev = startrev + i
3258 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3261 self.index.replace_sidedata_info(rev, *e)
3259 3262 packed = self.index.entry_binary(rev)
3260 3263 if rev == 0 and self._docket is None:
3261 3264 header = self._format_flags | self._format_version
3262 3265 header = self.index.pack_header(header)
3263 3266 packed = header + packed
3264 3267 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now