##// END OF EJS Templates
revlog: refactor logic to compute nodemap file in separate function...
Pulkit Goyal -
r47196:835aafb2 default
parent child Browse files
Show More
@@ -1,3090 +1,3085
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False, {}
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text, sidedata):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def getoffset(q):
141 141 return int(q >> 16)
142 142
143 143
144 144 def gettype(q):
145 145 return int(q & 0xFFFF)
146 146
147 147
148 148 def offset_type(offset, type):
149 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 150 raise ValueError(b'unknown revlog index flags')
151 151 return int(int(offset) << 16 | type)
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @attr.s(slots=True, frozen=True)
175 175 class _revisioninfo(object):
176 176 """Information about a revision that allows building its fulltext
177 177 node: expected hash of the revision
178 178 p1, p2: parent revs of the revision
179 179 btext: built text cache consisting of a one-element list
180 180 cachedelta: (baserev, uncompressed_delta) or None
181 181 flags: flags associated to the revision storage
182 182
183 183 One of btext[0] or cachedelta must be set.
184 184 """
185 185
186 186 node = attr.ib()
187 187 p1 = attr.ib()
188 188 p2 = attr.ib()
189 189 btext = attr.ib()
190 190 textlen = attr.ib()
191 191 cachedelta = attr.ib()
192 192 flags = attr.ib()
193 193
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208
209 209 @interfaceutil.implementer(repository.iverifyproblem)
210 210 @attr.s(frozen=True)
211 211 class revlogproblem(object):
212 212 warning = attr.ib(default=None)
213 213 error = attr.ib(default=None)
214 214 node = attr.ib(default=None)
215 215
216 216
217 217 # index v0:
218 218 # 4 bytes: offset
219 219 # 4 bytes: compressed length
220 220 # 4 bytes: base rev
221 221 # 4 bytes: link rev
222 222 # 20 bytes: parent 1 nodeid
223 223 # 20 bytes: parent 2 nodeid
224 224 # 20 bytes: nodeid
225 225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 226 indexformatv0_pack = indexformatv0.pack
227 227 indexformatv0_unpack = indexformatv0.unpack
228 228
229 229
230 230 class revlogoldindex(list):
231 231 @property
232 232 def nodemap(self):
233 233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 235 return self._nodemap
236 236
237 237 @util.propertycache
238 238 def _nodemap(self):
239 239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 240 for r in range(0, len(self)):
241 241 n = self[r][7]
242 242 nodemap[n] = r
243 243 return nodemap
244 244
245 245 def has_node(self, node):
246 246 """return True if the node exist in the index"""
247 247 return node in self._nodemap
248 248
249 249 def rev(self, node):
250 250 """return a revision for a node
251 251
252 252 If the node is unknown, raise a RevlogError"""
253 253 return self._nodemap[node]
254 254
255 255 def get_rev(self, node):
256 256 """return a revision for a node
257 257
258 258 If the node is unknown, return None"""
259 259 return self._nodemap.get(node)
260 260
261 261 def append(self, tup):
262 262 self._nodemap[tup[7]] = len(self)
263 263 super(revlogoldindex, self).append(tup)
264 264
265 265 def __delitem__(self, i):
266 266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 268 for r in pycompat.xrange(i.start, len(self)):
269 269 del self._nodemap[self[r][7]]
270 270 super(revlogoldindex, self).__delitem__(i)
271 271
272 272 def clearcaches(self):
273 273 self.__dict__.pop('_nodemap', None)
274 274
275 275 def __getitem__(self, i):
276 276 if i == -1:
277 277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 278 return list.__getitem__(self, i)
279 279
280 280
281 281 class revlogoldio(object):
282 282 def __init__(self):
283 283 self.size = indexformatv0.size
284 284
285 285 def parseindex(self, data, inline):
286 286 s = self.size
287 287 index = []
288 288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 289 n = off = 0
290 290 l = len(data)
291 291 while off + s <= l:
292 292 cur = data[off : off + s]
293 293 off += s
294 294 e = indexformatv0_unpack(cur)
295 295 # transform to revlogv1 format
296 296 e2 = (
297 297 offset_type(e[0], 0),
298 298 e[1],
299 299 -1,
300 300 e[2],
301 301 e[3],
302 302 nodemap.get(e[4], nullrev),
303 303 nodemap.get(e[5], nullrev),
304 304 e[6],
305 305 )
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 index = revlogoldindex(index)
311 311 return index, None
312 312
313 313 def packentry(self, entry, node, version, rev):
314 314 if gettype(entry[0]):
315 315 raise error.RevlogError(
316 316 _(b'index entry flags need revlog version 1')
317 317 )
318 318 e2 = (
319 319 getoffset(entry[0]),
320 320 entry[1],
321 321 entry[3],
322 322 entry[4],
323 323 node(entry[5]),
324 324 node(entry[6]),
325 325 entry[7],
326 326 )
327 327 return indexformatv0_pack(*e2)
328 328
329 329
330 330 # index ng:
331 331 # 6 bytes: offset
332 332 # 2 bytes: flags
333 333 # 4 bytes: compressed length
334 334 # 4 bytes: uncompressed length
335 335 # 4 bytes: base rev
336 336 # 4 bytes: link rev
337 337 # 4 bytes: parent 1 rev
338 338 # 4 bytes: parent 2 rev
339 339 # 32 bytes: nodeid
340 340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 341 indexformatng_pack = indexformatng.pack
342 342 versionformat = struct.Struct(b">I")
343 343 versionformat_pack = versionformat.pack
344 344 versionformat_unpack = versionformat.unpack
345 345
346 346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 347 # signed integer)
348 348 _maxentrysize = 0x7FFFFFFF
349 349
350 350
351 351 class revlogio(object):
352 352 def __init__(self):
353 353 self.size = indexformatng.size
354 354
355 355 def parseindex(self, data, inline):
356 356 # call the C implementation to parse the index data
357 357 index, cache = parsers.parse_index2(data, inline)
358 358 return index, cache
359 359
360 360 def packentry(self, entry, node, version, rev):
361 361 p = indexformatng_pack(*entry)
362 362 if rev == 0:
363 363 p = versionformat_pack(version) + p[4:]
364 364 return p
365 365
366 366
367 367 NodemapRevlogIO = None
368 368
369 369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 370
371 371 class NodemapRevlogIO(revlogio):
372 372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 373
374 374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 375 """
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 379 return index, cache
380 380
381 381
382 382 class rustrevlogio(revlogio):
383 383 def parseindex(self, data, inline):
384 384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 385 return rustrevlog.MixedIndex(index), cache
386 386
387 387
388 388 class revlog(object):
389 389 """
390 390 the underlying revision storage object
391 391
392 392 A revlog consists of two parts, an index and the revision data.
393 393
394 394 The index is a file with a fixed record size containing
395 395 information on each revision, including its nodeid (hash), the
396 396 nodeids of its parents, the position and offset of its data within
397 397 the data file, and the revision it's based on. Finally, each entry
398 398 contains a linkrev entry that can serve as a pointer to external
399 399 data.
400 400
401 401 The revision data itself is a linear collection of data chunks.
402 402 Each chunk represents a revision and is usually represented as a
403 403 delta against the previous chunk. To bound lookup time, runs of
404 404 deltas are limited to about 2 times the length of the original
405 405 version data. This makes retrieval of a version proportional to
406 406 its size, or O(1) relative to the number of revisions.
407 407
408 408 Both pieces of the revlog are written to in an append-only
409 409 fashion, which means we never need to rewrite a file to insert or
410 410 remove data, and can use some simple techniques to avoid the need
411 411 for locking while reading.
412 412
413 413 If checkambig, indexfile is opened with checkambig=True at
414 414 writing, to avoid file stat ambiguity.
415 415
416 416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 417 index will be mmapped rather than read if it is larger than the
418 418 configured threshold.
419 419
420 420 If censorable is True, the revlog can have censored revisions.
421 421
422 422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 423 compression for the data content.
424 424 """
425 425
426 426 _flagserrorclass = error.RevlogError
427 427
428 428 def __init__(
429 429 self,
430 430 opener,
431 431 indexfile,
432 432 datafile=None,
433 433 checkambig=False,
434 434 mmaplargeindex=False,
435 435 censorable=False,
436 436 upperboundcomp=None,
437 437 persistentnodemap=False,
438 438 ):
439 439 """
440 440 create a revlog object
441 441
442 442 opener is a function that abstracts the file opening operation
443 443 and can be used to implement COW semantics or the like.
444 444
445 445 """
446 446 self.upperboundcomp = upperboundcomp
447 447 self.indexfile = indexfile
448 448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 449 self.nodemap_file = None
450 450 if persistentnodemap:
451 if indexfile.endswith(b'.a'):
452 pending_path = indexfile[:-4] + b".n.a"
453 if opener.exists(pending_path):
454 self.nodemap_file = pending_path
455 else:
456 self.nodemap_file = indexfile[:-4] + b".n"
457 else:
458 self.nodemap_file = indexfile[:-2] + b".n"
451 self.nodemap_file = nodemaputil.get_nodemap_file(
452 opener, self.indexfile
453 )
459 454
460 455 self.opener = opener
461 456 # When True, indexfile is opened with checkambig=True at writing, to
462 457 # avoid file stat ambiguity.
463 458 self._checkambig = checkambig
464 459 self._mmaplargeindex = mmaplargeindex
465 460 self._censorable = censorable
466 461 # 3-tuple of (node, rev, text) for a raw revision.
467 462 self._revisioncache = None
468 463 # Maps rev to chain base rev.
469 464 self._chainbasecache = util.lrucachedict(100)
470 465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
471 466 self._chunkcache = (0, b'')
472 467 # How much data to read and cache into the raw revlog data cache.
473 468 self._chunkcachesize = 65536
474 469 self._maxchainlen = None
475 470 self._deltabothparents = True
476 471 self.index = None
477 472 self._nodemap_docket = None
478 473 # Mapping of partial identifiers to full nodes.
479 474 self._pcache = {}
480 475 # Mapping of revision integer to full node.
481 476 self._compengine = b'zlib'
482 477 self._compengineopts = {}
483 478 self._maxdeltachainspan = -1
484 479 self._withsparseread = False
485 480 self._sparserevlog = False
486 481 self._srdensitythreshold = 0.50
487 482 self._srmingapsize = 262144
488 483
489 484 # Make copy of flag processors so each revlog instance can support
490 485 # custom flags.
491 486 self._flagprocessors = dict(flagutil.flagprocessors)
492 487
493 488 # 2-tuple of file handles being used for active writing.
494 489 self._writinghandles = None
495 490
496 491 self._loadindex()
497 492
498 493 def _loadindex(self):
499 494 mmapindexthreshold = None
500 495 opts = self.opener.options
501 496
502 497 if b'revlogv2' in opts:
503 498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
504 499 elif b'revlogv1' in opts:
505 500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
506 501 if b'generaldelta' in opts:
507 502 newversionflags |= FLAG_GENERALDELTA
508 503 elif b'revlogv0' in self.opener.options:
509 504 newversionflags = REVLOGV0
510 505 else:
511 506 newversionflags = REVLOG_DEFAULT_VERSION
512 507
513 508 if b'chunkcachesize' in opts:
514 509 self._chunkcachesize = opts[b'chunkcachesize']
515 510 if b'maxchainlen' in opts:
516 511 self._maxchainlen = opts[b'maxchainlen']
517 512 if b'deltabothparents' in opts:
518 513 self._deltabothparents = opts[b'deltabothparents']
519 514 self._lazydelta = bool(opts.get(b'lazydelta', True))
520 515 self._lazydeltabase = False
521 516 if self._lazydelta:
522 517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
523 518 if b'compengine' in opts:
524 519 self._compengine = opts[b'compengine']
525 520 if b'zlib.level' in opts:
526 521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
527 522 if b'zstd.level' in opts:
528 523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
529 524 if b'maxdeltachainspan' in opts:
530 525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
531 526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
532 527 mmapindexthreshold = opts[b'mmapindexthreshold']
533 528 self.hassidedata = bool(opts.get(b'side-data', False))
534 529 if self.hassidedata:
535 530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
536 531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
537 532 withsparseread = bool(opts.get(b'with-sparse-read', False))
538 533 # sparse-revlog forces sparse-read
539 534 self._withsparseread = self._sparserevlog or withsparseread
540 535 if b'sparse-read-density-threshold' in opts:
541 536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
542 537 if b'sparse-read-min-gap-size' in opts:
543 538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
544 539 if opts.get(b'enableellipsis'):
545 540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
546 541
547 542 # revlog v0 doesn't have flag processors
548 543 for flag, processor in pycompat.iteritems(
549 544 opts.get(b'flagprocessors', {})
550 545 ):
551 546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
552 547
553 548 if self._chunkcachesize <= 0:
554 549 raise error.RevlogError(
555 550 _(b'revlog chunk cache size %r is not greater than 0')
556 551 % self._chunkcachesize
557 552 )
558 553 elif self._chunkcachesize & (self._chunkcachesize - 1):
559 554 raise error.RevlogError(
560 555 _(b'revlog chunk cache size %r is not a power of 2')
561 556 % self._chunkcachesize
562 557 )
563 558
564 559 indexdata = b''
565 560 self._initempty = True
566 561 try:
567 562 with self._indexfp() as f:
568 563 if (
569 564 mmapindexthreshold is not None
570 565 and self.opener.fstat(f).st_size >= mmapindexthreshold
571 566 ):
572 567 # TODO: should .close() to release resources without
573 568 # relying on Python GC
574 569 indexdata = util.buffer(util.mmapread(f))
575 570 else:
576 571 indexdata = f.read()
577 572 if len(indexdata) > 0:
578 573 versionflags = versionformat_unpack(indexdata[:4])[0]
579 574 self._initempty = False
580 575 else:
581 576 versionflags = newversionflags
582 577 except IOError as inst:
583 578 if inst.errno != errno.ENOENT:
584 579 raise
585 580
586 581 versionflags = newversionflags
587 582
588 583 self.version = versionflags
589 584
590 585 flags = versionflags & ~0xFFFF
591 586 fmt = versionflags & 0xFFFF
592 587
593 588 if fmt == REVLOGV0:
594 589 if flags:
595 590 raise error.RevlogError(
596 591 _(b'unknown flags (%#04x) in version %d revlog %s')
597 592 % (flags >> 16, fmt, self.indexfile)
598 593 )
599 594
600 595 self._inline = False
601 596 self._generaldelta = False
602 597
603 598 elif fmt == REVLOGV1:
604 599 if flags & ~REVLOGV1_FLAGS:
605 600 raise error.RevlogError(
606 601 _(b'unknown flags (%#04x) in version %d revlog %s')
607 602 % (flags >> 16, fmt, self.indexfile)
608 603 )
609 604
610 605 self._inline = versionflags & FLAG_INLINE_DATA
611 606 self._generaldelta = versionflags & FLAG_GENERALDELTA
612 607
613 608 elif fmt == REVLOGV2:
614 609 if flags & ~REVLOGV2_FLAGS:
615 610 raise error.RevlogError(
616 611 _(b'unknown flags (%#04x) in version %d revlog %s')
617 612 % (flags >> 16, fmt, self.indexfile)
618 613 )
619 614
620 615 self._inline = versionflags & FLAG_INLINE_DATA
621 616 # generaldelta implied by version 2 revlogs.
622 617 self._generaldelta = True
623 618
624 619 else:
625 620 raise error.RevlogError(
626 621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
627 622 )
628 623 # sparse-revlog can't be on without general-delta (issue6056)
629 624 if not self._generaldelta:
630 625 self._sparserevlog = False
631 626
632 627 self._storedeltachains = True
633 628
634 629 devel_nodemap = (
635 630 self.nodemap_file
636 631 and opts.get(b'devel-force-nodemap', False)
637 632 and NodemapRevlogIO is not None
638 633 )
639 634
640 635 use_rust_index = False
641 636 if rustrevlog is not None:
642 637 if self.nodemap_file is not None:
643 638 use_rust_index = True
644 639 else:
645 640 use_rust_index = self.opener.options.get(b'rust.index')
646 641
647 642 self._io = revlogio()
648 643 if self.version == REVLOGV0:
649 644 self._io = revlogoldio()
650 645 elif devel_nodemap:
651 646 self._io = NodemapRevlogIO()
652 647 elif use_rust_index:
653 648 self._io = rustrevlogio()
654 649 try:
655 650 d = self._io.parseindex(indexdata, self._inline)
656 651 index, _chunkcache = d
657 652 use_nodemap = (
658 653 not self._inline
659 654 and self.nodemap_file is not None
660 655 and util.safehasattr(index, 'update_nodemap_data')
661 656 )
662 657 if use_nodemap:
663 658 nodemap_data = nodemaputil.persisted_data(self)
664 659 if nodemap_data is not None:
665 660 docket = nodemap_data[0]
666 661 if (
667 662 len(d[0]) > docket.tip_rev
668 663 and d[0][docket.tip_rev][7] == docket.tip_node
669 664 ):
670 665 # no changelog tampering
671 666 self._nodemap_docket = docket
672 667 index.update_nodemap_data(*nodemap_data)
673 668 except (ValueError, IndexError):
674 669 raise error.RevlogError(
675 670 _(b"index %s is corrupted") % self.indexfile
676 671 )
677 672 self.index, self._chunkcache = d
678 673 if not self._chunkcache:
679 674 self._chunkclear()
680 675 # revnum -> (chain-length, sum-delta-length)
681 676 self._chaininfocache = util.lrucachedict(500)
682 677 # revlog header -> revlog compressor
683 678 self._decompressors = {}
684 679
685 680 @util.propertycache
686 681 def _compressor(self):
687 682 engine = util.compengines[self._compengine]
688 683 return engine.revlogcompressor(self._compengineopts)
689 684
690 685 def _indexfp(self, mode=b'r'):
691 686 """file object for the revlog's index file"""
692 687 args = {'mode': mode}
693 688 if mode != b'r':
694 689 args['checkambig'] = self._checkambig
695 690 if mode == b'w':
696 691 args['atomictemp'] = True
697 692 return self.opener(self.indexfile, **args)
698 693
699 694 def _datafp(self, mode=b'r'):
700 695 """file object for the revlog's data file"""
701 696 return self.opener(self.datafile, mode=mode)
702 697
703 698 @contextlib.contextmanager
704 699 def _datareadfp(self, existingfp=None):
705 700 """file object suitable to read data"""
706 701 # Use explicit file handle, if given.
707 702 if existingfp is not None:
708 703 yield existingfp
709 704
710 705 # Use a file handle being actively used for writes, if available.
711 706 # There is some danger to doing this because reads will seek the
712 707 # file. However, _writeentry() performs a SEEK_END before all writes,
713 708 # so we should be safe.
714 709 elif self._writinghandles:
715 710 if self._inline:
716 711 yield self._writinghandles[0]
717 712 else:
718 713 yield self._writinghandles[1]
719 714
720 715 # Otherwise open a new file handle.
721 716 else:
722 717 if self._inline:
723 718 func = self._indexfp
724 719 else:
725 720 func = self._datafp
726 721 with func() as fp:
727 722 yield fp
728 723
729 724 def tiprev(self):
730 725 return len(self.index) - 1
731 726
732 727 def tip(self):
733 728 return self.node(self.tiprev())
734 729
735 730 def __contains__(self, rev):
736 731 return 0 <= rev < len(self)
737 732
738 733 def __len__(self):
739 734 return len(self.index)
740 735
741 736 def __iter__(self):
742 737 return iter(pycompat.xrange(len(self)))
743 738
744 739 def revs(self, start=0, stop=None):
745 740 """iterate over all rev in this revlog (from start to stop)"""
746 741 return storageutil.iterrevs(len(self), start=start, stop=stop)
747 742
748 743 @property
749 744 def nodemap(self):
750 745 msg = (
751 746 b"revlog.nodemap is deprecated, "
752 747 b"use revlog.index.[has_node|rev|get_rev]"
753 748 )
754 749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
755 750 return self.index.nodemap
756 751
757 752 @property
758 753 def _nodecache(self):
759 754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
760 755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
761 756 return self.index.nodemap
762 757
763 758 def hasnode(self, node):
764 759 try:
765 760 self.rev(node)
766 761 return True
767 762 except KeyError:
768 763 return False
769 764
770 765 def candelta(self, baserev, rev):
771 766 """whether two revisions (baserev, rev) can be delta-ed or not"""
772 767 # Disable delta if either rev requires a content-changing flag
773 768 # processor (ex. LFS). This is because such flag processor can alter
774 769 # the rawtext content that the delta will be based on, and two clients
775 770 # could have a same revlog node with different flags (i.e. different
776 771 # rawtext contents) and the delta could be incompatible.
777 772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
778 773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
779 774 ):
780 775 return False
781 776 return True
782 777
783 778 def update_caches(self, transaction):
784 779 if self.nodemap_file is not None:
785 780 if transaction is None:
786 781 nodemaputil.update_persistent_nodemap(self)
787 782 else:
788 783 nodemaputil.setup_persistent_nodemap(transaction, self)
789 784
790 785 def clearcaches(self):
791 786 self._revisioncache = None
792 787 self._chainbasecache.clear()
793 788 self._chunkcache = (0, b'')
794 789 self._pcache = {}
795 790 self._nodemap_docket = None
796 791 self.index.clearcaches()
797 792 # The python code is the one responsible for validating the docket, we
798 793 # end up having to refresh it here.
799 794 use_nodemap = (
800 795 not self._inline
801 796 and self.nodemap_file is not None
802 797 and util.safehasattr(self.index, 'update_nodemap_data')
803 798 )
804 799 if use_nodemap:
805 800 nodemap_data = nodemaputil.persisted_data(self)
806 801 if nodemap_data is not None:
807 802 self._nodemap_docket = nodemap_data[0]
808 803 self.index.update_nodemap_data(*nodemap_data)
809 804
810 805 def rev(self, node):
811 806 try:
812 807 return self.index.rev(node)
813 808 except TypeError:
814 809 raise
815 810 except error.RevlogError:
816 811 # parsers.c radix tree lookup failed
817 812 if node == wdirid or node in wdirfilenodeids:
818 813 raise error.WdirUnsupported
819 814 raise error.LookupError(node, self.indexfile, _(b'no node'))
820 815
821 816 # Accessors for index entries.
822 817
823 818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 819 # are flags.
825 820 def start(self, rev):
826 821 return int(self.index[rev][0] >> 16)
827 822
828 823 def flags(self, rev):
829 824 return self.index[rev][0] & 0xFFFF
830 825
831 826 def length(self, rev):
832 827 return self.index[rev][1]
833 828
834 829 def rawsize(self, rev):
835 830 """return the length of the uncompressed text for a given revision"""
836 831 l = self.index[rev][2]
837 832 if l >= 0:
838 833 return l
839 834
840 835 t = self.rawdata(rev)
841 836 return len(t)
842 837
843 838 def size(self, rev):
844 839 """length of non-raw text (processed by a "read" flag processor)"""
845 840 # fast path: if no "read" flag processor could change the content,
846 841 # size is rawsize. note: ELLIPSIS is known to not change the content.
847 842 flags = self.flags(rev)
848 843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
849 844 return self.rawsize(rev)
850 845
851 846 return len(self.revision(rev, raw=False))
852 847
853 848 def chainbase(self, rev):
854 849 base = self._chainbasecache.get(rev)
855 850 if base is not None:
856 851 return base
857 852
858 853 index = self.index
859 854 iterrev = rev
860 855 base = index[iterrev][3]
861 856 while base != iterrev:
862 857 iterrev = base
863 858 base = index[iterrev][3]
864 859
865 860 self._chainbasecache[rev] = base
866 861 return base
867 862
868 863 def linkrev(self, rev):
869 864 return self.index[rev][4]
870 865
871 866 def parentrevs(self, rev):
872 867 try:
873 868 entry = self.index[rev]
874 869 except IndexError:
875 870 if rev == wdirrev:
876 871 raise error.WdirUnsupported
877 872 raise
878 873
879 874 return entry[5], entry[6]
880 875
881 876 # fast parentrevs(rev) where rev isn't filtered
882 877 _uncheckedparentrevs = parentrevs
883 878
884 879 def node(self, rev):
885 880 try:
886 881 return self.index[rev][7]
887 882 except IndexError:
888 883 if rev == wdirrev:
889 884 raise error.WdirUnsupported
890 885 raise
891 886
892 887 # Derived from index values.
893 888
894 889 def end(self, rev):
895 890 return self.start(rev) + self.length(rev)
896 891
897 892 def parents(self, node):
898 893 i = self.index
899 894 d = i[self.rev(node)]
900 895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
901 896
902 897 def chainlen(self, rev):
903 898 return self._chaininfo(rev)[0]
904 899
905 900 def _chaininfo(self, rev):
906 901 chaininfocache = self._chaininfocache
907 902 if rev in chaininfocache:
908 903 return chaininfocache[rev]
909 904 index = self.index
910 905 generaldelta = self._generaldelta
911 906 iterrev = rev
912 907 e = index[iterrev]
913 908 clen = 0
914 909 compresseddeltalen = 0
915 910 while iterrev != e[3]:
916 911 clen += 1
917 912 compresseddeltalen += e[1]
918 913 if generaldelta:
919 914 iterrev = e[3]
920 915 else:
921 916 iterrev -= 1
922 917 if iterrev in chaininfocache:
923 918 t = chaininfocache[iterrev]
924 919 clen += t[0]
925 920 compresseddeltalen += t[1]
926 921 break
927 922 e = index[iterrev]
928 923 else:
929 924 # Add text length of base since decompressing that also takes
930 925 # work. For cache hits the length is already included.
931 926 compresseddeltalen += e[1]
932 927 r = (clen, compresseddeltalen)
933 928 chaininfocache[rev] = r
934 929 return r
935 930
936 931 def _deltachain(self, rev, stoprev=None):
937 932 """Obtain the delta chain for a revision.
938 933
939 934 ``stoprev`` specifies a revision to stop at. If not specified, we
940 935 stop at the base of the chain.
941 936
942 937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
943 938 revs in ascending order and ``stopped`` is a bool indicating whether
944 939 ``stoprev`` was hit.
945 940 """
946 941 # Try C implementation.
947 942 try:
948 943 return self.index.deltachain(rev, stoprev, self._generaldelta)
949 944 except AttributeError:
950 945 pass
951 946
952 947 chain = []
953 948
954 949 # Alias to prevent attribute lookup in tight loop.
955 950 index = self.index
956 951 generaldelta = self._generaldelta
957 952
958 953 iterrev = rev
959 954 e = index[iterrev]
960 955 while iterrev != e[3] and iterrev != stoprev:
961 956 chain.append(iterrev)
962 957 if generaldelta:
963 958 iterrev = e[3]
964 959 else:
965 960 iterrev -= 1
966 961 e = index[iterrev]
967 962
968 963 if iterrev == stoprev:
969 964 stopped = True
970 965 else:
971 966 chain.append(iterrev)
972 967 stopped = False
973 968
974 969 chain.reverse()
975 970 return chain, stopped
976 971
977 972 def ancestors(self, revs, stoprev=0, inclusive=False):
978 973 """Generate the ancestors of 'revs' in reverse revision order.
979 974 Does not generate revs lower than stoprev.
980 975
981 976 See the documentation for ancestor.lazyancestors for more details."""
982 977
983 978 # first, make sure start revisions aren't filtered
984 979 revs = list(revs)
985 980 checkrev = self.node
986 981 for r in revs:
987 982 checkrev(r)
988 983 # and we're sure ancestors aren't filtered as well
989 984
990 985 if rustancestor is not None:
991 986 lazyancestors = rustancestor.LazyAncestors
992 987 arg = self.index
993 988 else:
994 989 lazyancestors = ancestor.lazyancestors
995 990 arg = self._uncheckedparentrevs
996 991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
997 992
998 993 def descendants(self, revs):
999 994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1000 995
1001 996 def findcommonmissing(self, common=None, heads=None):
1002 997 """Return a tuple of the ancestors of common and the ancestors of heads
1003 998 that are not ancestors of common. In revset terminology, we return the
1004 999 tuple:
1005 1000
1006 1001 ::common, (::heads) - (::common)
1007 1002
1008 1003 The list is sorted by revision number, meaning it is
1009 1004 topologically sorted.
1010 1005
1011 1006 'heads' and 'common' are both lists of node IDs. If heads is
1012 1007 not supplied, uses all of the revlog's heads. If common is not
1013 1008 supplied, uses nullid."""
1014 1009 if common is None:
1015 1010 common = [nullid]
1016 1011 if heads is None:
1017 1012 heads = self.heads()
1018 1013
1019 1014 common = [self.rev(n) for n in common]
1020 1015 heads = [self.rev(n) for n in heads]
1021 1016
1022 1017 # we want the ancestors, but inclusive
1023 1018 class lazyset(object):
1024 1019 def __init__(self, lazyvalues):
1025 1020 self.addedvalues = set()
1026 1021 self.lazyvalues = lazyvalues
1027 1022
1028 1023 def __contains__(self, value):
1029 1024 return value in self.addedvalues or value in self.lazyvalues
1030 1025
1031 1026 def __iter__(self):
1032 1027 added = self.addedvalues
1033 1028 for r in added:
1034 1029 yield r
1035 1030 for r in self.lazyvalues:
1036 1031 if not r in added:
1037 1032 yield r
1038 1033
1039 1034 def add(self, value):
1040 1035 self.addedvalues.add(value)
1041 1036
1042 1037 def update(self, values):
1043 1038 self.addedvalues.update(values)
1044 1039
1045 1040 has = lazyset(self.ancestors(common))
1046 1041 has.add(nullrev)
1047 1042 has.update(common)
1048 1043
1049 1044 # take all ancestors from heads that aren't in has
1050 1045 missing = set()
1051 1046 visit = collections.deque(r for r in heads if r not in has)
1052 1047 while visit:
1053 1048 r = visit.popleft()
1054 1049 if r in missing:
1055 1050 continue
1056 1051 else:
1057 1052 missing.add(r)
1058 1053 for p in self.parentrevs(r):
1059 1054 if p not in has:
1060 1055 visit.append(p)
1061 1056 missing = list(missing)
1062 1057 missing.sort()
1063 1058 return has, [self.node(miss) for miss in missing]
1064 1059
1065 1060 def incrementalmissingrevs(self, common=None):
1066 1061 """Return an object that can be used to incrementally compute the
1067 1062 revision numbers of the ancestors of arbitrary sets that are not
1068 1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1069 1064 object.
1070 1065
1071 1066 'common' is a list of revision numbers. If common is not supplied, uses
1072 1067 nullrev.
1073 1068 """
1074 1069 if common is None:
1075 1070 common = [nullrev]
1076 1071
1077 1072 if rustancestor is not None:
1078 1073 return rustancestor.MissingAncestors(self.index, common)
1079 1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1080 1075
1081 1076 def findmissingrevs(self, common=None, heads=None):
1082 1077 """Return the revision numbers of the ancestors of heads that
1083 1078 are not ancestors of common.
1084 1079
1085 1080 More specifically, return a list of revision numbers corresponding to
1086 1081 nodes N such that every N satisfies the following constraints:
1087 1082
1088 1083 1. N is an ancestor of some node in 'heads'
1089 1084 2. N is not an ancestor of any node in 'common'
1090 1085
1091 1086 The list is sorted by revision number, meaning it is
1092 1087 topologically sorted.
1093 1088
1094 1089 'heads' and 'common' are both lists of revision numbers. If heads is
1095 1090 not supplied, uses all of the revlog's heads. If common is not
1096 1091 supplied, uses nullid."""
1097 1092 if common is None:
1098 1093 common = [nullrev]
1099 1094 if heads is None:
1100 1095 heads = self.headrevs()
1101 1096
1102 1097 inc = self.incrementalmissingrevs(common=common)
1103 1098 return inc.missingancestors(heads)
1104 1099
1105 1100 def findmissing(self, common=None, heads=None):
1106 1101 """Return the ancestors of heads that are not ancestors of common.
1107 1102
1108 1103 More specifically, return a list of nodes N such that every N
1109 1104 satisfies the following constraints:
1110 1105
1111 1106 1. N is an ancestor of some node in 'heads'
1112 1107 2. N is not an ancestor of any node in 'common'
1113 1108
1114 1109 The list is sorted by revision number, meaning it is
1115 1110 topologically sorted.
1116 1111
1117 1112 'heads' and 'common' are both lists of node IDs. If heads is
1118 1113 not supplied, uses all of the revlog's heads. If common is not
1119 1114 supplied, uses nullid."""
1120 1115 if common is None:
1121 1116 common = [nullid]
1122 1117 if heads is None:
1123 1118 heads = self.heads()
1124 1119
1125 1120 common = [self.rev(n) for n in common]
1126 1121 heads = [self.rev(n) for n in heads]
1127 1122
1128 1123 inc = self.incrementalmissingrevs(common=common)
1129 1124 return [self.node(r) for r in inc.missingancestors(heads)]
1130 1125
1131 1126 def nodesbetween(self, roots=None, heads=None):
1132 1127 """Return a topological path from 'roots' to 'heads'.
1133 1128
1134 1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1135 1130 topologically sorted list of all nodes N that satisfy both of
1136 1131 these constraints:
1137 1132
1138 1133 1. N is a descendant of some node in 'roots'
1139 1134 2. N is an ancestor of some node in 'heads'
1140 1135
1141 1136 Every node is considered to be both a descendant and an ancestor
1142 1137 of itself, so every reachable node in 'roots' and 'heads' will be
1143 1138 included in 'nodes'.
1144 1139
1145 1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1146 1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1147 1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1148 1143
1149 1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1150 1145 unspecified, uses nullid as the only root. If 'heads' is
1151 1146 unspecified, uses list of all of the revlog's heads."""
1152 1147 nonodes = ([], [], [])
1153 1148 if roots is not None:
1154 1149 roots = list(roots)
1155 1150 if not roots:
1156 1151 return nonodes
1157 1152 lowestrev = min([self.rev(n) for n in roots])
1158 1153 else:
1159 1154 roots = [nullid] # Everybody's a descendant of nullid
1160 1155 lowestrev = nullrev
1161 1156 if (lowestrev == nullrev) and (heads is None):
1162 1157 # We want _all_ the nodes!
1163 1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1164 1159 if heads is None:
1165 1160 # All nodes are ancestors, so the latest ancestor is the last
1166 1161 # node.
1167 1162 highestrev = len(self) - 1
1168 1163 # Set ancestors to None to signal that every node is an ancestor.
1169 1164 ancestors = None
1170 1165 # Set heads to an empty dictionary for later discovery of heads
1171 1166 heads = {}
1172 1167 else:
1173 1168 heads = list(heads)
1174 1169 if not heads:
1175 1170 return nonodes
1176 1171 ancestors = set()
1177 1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1178 1173 # Also, later we will be using it to filter out the heads we can't
1179 1174 # find from roots.
1180 1175 heads = dict.fromkeys(heads, False)
1181 1176 # Start at the top and keep marking parents until we're done.
1182 1177 nodestotag = set(heads)
1183 1178 # Remember where the top was so we can use it as a limit later.
1184 1179 highestrev = max([self.rev(n) for n in nodestotag])
1185 1180 while nodestotag:
1186 1181 # grab a node to tag
1187 1182 n = nodestotag.pop()
1188 1183 # Never tag nullid
1189 1184 if n == nullid:
1190 1185 continue
1191 1186 # A node's revision number represents its place in a
1192 1187 # topologically sorted list of nodes.
1193 1188 r = self.rev(n)
1194 1189 if r >= lowestrev:
1195 1190 if n not in ancestors:
1196 1191 # If we are possibly a descendant of one of the roots
1197 1192 # and we haven't already been marked as an ancestor
1198 1193 ancestors.add(n) # Mark as ancestor
1199 1194 # Add non-nullid parents to list of nodes to tag.
1200 1195 nodestotag.update(
1201 1196 [p for p in self.parents(n) if p != nullid]
1202 1197 )
1203 1198 elif n in heads: # We've seen it before, is it a fake head?
1204 1199 # So it is, real heads should not be the ancestors of
1205 1200 # any other heads.
1206 1201 heads.pop(n)
1207 1202 if not ancestors:
1208 1203 return nonodes
1209 1204 # Now that we have our set of ancestors, we want to remove any
1210 1205 # roots that are not ancestors.
1211 1206
1212 1207 # If one of the roots was nullid, everything is included anyway.
1213 1208 if lowestrev > nullrev:
1214 1209 # But, since we weren't, let's recompute the lowest rev to not
1215 1210 # include roots that aren't ancestors.
1216 1211
1217 1212 # Filter out roots that aren't ancestors of heads
1218 1213 roots = [root for root in roots if root in ancestors]
1219 1214 # Recompute the lowest revision
1220 1215 if roots:
1221 1216 lowestrev = min([self.rev(root) for root in roots])
1222 1217 else:
1223 1218 # No more roots? Return empty list
1224 1219 return nonodes
1225 1220 else:
1226 1221 # We are descending from nullid, and don't need to care about
1227 1222 # any other roots.
1228 1223 lowestrev = nullrev
1229 1224 roots = [nullid]
1230 1225 # Transform our roots list into a set.
1231 1226 descendants = set(roots)
1232 1227 # Also, keep the original roots so we can filter out roots that aren't
1233 1228 # 'real' roots (i.e. are descended from other roots).
1234 1229 roots = descendants.copy()
1235 1230 # Our topologically sorted list of output nodes.
1236 1231 orderedout = []
1237 1232 # Don't start at nullid since we don't want nullid in our output list,
1238 1233 # and if nullid shows up in descendants, empty parents will look like
1239 1234 # they're descendants.
1240 1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1241 1236 n = self.node(r)
1242 1237 isdescendant = False
1243 1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1244 1239 isdescendant = True
1245 1240 elif n in descendants:
1246 1241 # n is already a descendant
1247 1242 isdescendant = True
1248 1243 # This check only needs to be done here because all the roots
1249 1244 # will start being marked is descendants before the loop.
1250 1245 if n in roots:
1251 1246 # If n was a root, check if it's a 'real' root.
1252 1247 p = tuple(self.parents(n))
1253 1248 # If any of its parents are descendants, it's not a root.
1254 1249 if (p[0] in descendants) or (p[1] in descendants):
1255 1250 roots.remove(n)
1256 1251 else:
1257 1252 p = tuple(self.parents(n))
1258 1253 # A node is a descendant if either of its parents are
1259 1254 # descendants. (We seeded the dependents list with the roots
1260 1255 # up there, remember?)
1261 1256 if (p[0] in descendants) or (p[1] in descendants):
1262 1257 descendants.add(n)
1263 1258 isdescendant = True
1264 1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1265 1260 # Only include nodes that are both descendants and ancestors.
1266 1261 orderedout.append(n)
1267 1262 if (ancestors is not None) and (n in heads):
1268 1263 # We're trying to figure out which heads are reachable
1269 1264 # from roots.
1270 1265 # Mark this head as having been reached
1271 1266 heads[n] = True
1272 1267 elif ancestors is None:
1273 1268 # Otherwise, we're trying to discover the heads.
1274 1269 # Assume this is a head because if it isn't, the next step
1275 1270 # will eventually remove it.
1276 1271 heads[n] = True
1277 1272 # But, obviously its parents aren't.
1278 1273 for p in self.parents(n):
1279 1274 heads.pop(p, None)
1280 1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1281 1276 roots = list(roots)
1282 1277 assert orderedout
1283 1278 assert roots
1284 1279 assert heads
1285 1280 return (orderedout, roots, heads)
1286 1281
1287 1282 def headrevs(self, revs=None):
1288 1283 if revs is None:
1289 1284 try:
1290 1285 return self.index.headrevs()
1291 1286 except AttributeError:
1292 1287 return self._headrevs()
1293 1288 if rustdagop is not None:
1294 1289 return rustdagop.headrevs(self.index, revs)
1295 1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1296 1291
1297 1292 def computephases(self, roots):
1298 1293 return self.index.computephasesmapsets(roots)
1299 1294
1300 1295 def _headrevs(self):
1301 1296 count = len(self)
1302 1297 if not count:
1303 1298 return [nullrev]
1304 1299 # we won't iter over filtered rev so nobody is a head at start
1305 1300 ishead = [0] * (count + 1)
1306 1301 index = self.index
1307 1302 for r in self:
1308 1303 ishead[r] = 1 # I may be an head
1309 1304 e = index[r]
1310 1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1311 1306 return [r for r, val in enumerate(ishead) if val]
1312 1307
1313 1308 def heads(self, start=None, stop=None):
1314 1309 """return the list of all nodes that have no children
1315 1310
1316 1311 if start is specified, only heads that are descendants of
1317 1312 start will be returned
1318 1313 if stop is specified, it will consider all the revs from stop
1319 1314 as if they had no children
1320 1315 """
1321 1316 if start is None and stop is None:
1322 1317 if not len(self):
1323 1318 return [nullid]
1324 1319 return [self.node(r) for r in self.headrevs()]
1325 1320
1326 1321 if start is None:
1327 1322 start = nullrev
1328 1323 else:
1329 1324 start = self.rev(start)
1330 1325
1331 1326 stoprevs = {self.rev(n) for n in stop or []}
1332 1327
1333 1328 revs = dagop.headrevssubset(
1334 1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1335 1330 )
1336 1331
1337 1332 return [self.node(rev) for rev in revs]
1338 1333
1339 1334 def children(self, node):
1340 1335 """find the children of a given node"""
1341 1336 c = []
1342 1337 p = self.rev(node)
1343 1338 for r in self.revs(start=p + 1):
1344 1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1345 1340 if prevs:
1346 1341 for pr in prevs:
1347 1342 if pr == p:
1348 1343 c.append(self.node(r))
1349 1344 elif p == nullrev:
1350 1345 c.append(self.node(r))
1351 1346 return c
1352 1347
1353 1348 def commonancestorsheads(self, a, b):
1354 1349 """calculate all the heads of the common ancestors of nodes a and b"""
1355 1350 a, b = self.rev(a), self.rev(b)
1356 1351 ancs = self._commonancestorsheads(a, b)
1357 1352 return pycompat.maplist(self.node, ancs)
1358 1353
1359 1354 def _commonancestorsheads(self, *revs):
1360 1355 """calculate all the heads of the common ancestors of revs"""
1361 1356 try:
1362 1357 ancs = self.index.commonancestorsheads(*revs)
1363 1358 except (AttributeError, OverflowError): # C implementation failed
1364 1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1365 1360 return ancs
1366 1361
1367 1362 def isancestor(self, a, b):
1368 1363 """return True if node a is an ancestor of node b
1369 1364
1370 1365 A revision is considered an ancestor of itself."""
1371 1366 a, b = self.rev(a), self.rev(b)
1372 1367 return self.isancestorrev(a, b)
1373 1368
1374 1369 def isancestorrev(self, a, b):
1375 1370 """return True if revision a is an ancestor of revision b
1376 1371
1377 1372 A revision is considered an ancestor of itself.
1378 1373
1379 1374 The implementation of this is trivial but the use of
1380 1375 reachableroots is not."""
1381 1376 if a == nullrev:
1382 1377 return True
1383 1378 elif a == b:
1384 1379 return True
1385 1380 elif a > b:
1386 1381 return False
1387 1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1388 1383
1389 1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1390 1385 """return (heads(::(<roots> and <roots>::<heads>)))
1391 1386
1392 1387 If includepath is True, return (<roots>::<heads>)."""
1393 1388 try:
1394 1389 return self.index.reachableroots2(
1395 1390 minroot, heads, roots, includepath
1396 1391 )
1397 1392 except AttributeError:
1398 1393 return dagop._reachablerootspure(
1399 1394 self.parentrevs, minroot, roots, heads, includepath
1400 1395 )
1401 1396
1402 1397 def ancestor(self, a, b):
1403 1398 """calculate the "best" common ancestor of nodes a and b"""
1404 1399
1405 1400 a, b = self.rev(a), self.rev(b)
1406 1401 try:
1407 1402 ancs = self.index.ancestors(a, b)
1408 1403 except (AttributeError, OverflowError):
1409 1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1410 1405 if ancs:
1411 1406 # choose a consistent winner when there's a tie
1412 1407 return min(map(self.node, ancs))
1413 1408 return nullid
1414 1409
1415 1410 def _match(self, id):
1416 1411 if isinstance(id, int):
1417 1412 # rev
1418 1413 return self.node(id)
1419 1414 if len(id) == 20:
1420 1415 # possibly a binary node
1421 1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1422 1417 try:
1423 1418 node = id
1424 1419 self.rev(node) # quick search the index
1425 1420 return node
1426 1421 except error.LookupError:
1427 1422 pass # may be partial hex id
1428 1423 try:
1429 1424 # str(rev)
1430 1425 rev = int(id)
1431 1426 if b"%d" % rev != id:
1432 1427 raise ValueError
1433 1428 if rev < 0:
1434 1429 rev = len(self) + rev
1435 1430 if rev < 0 or rev >= len(self):
1436 1431 raise ValueError
1437 1432 return self.node(rev)
1438 1433 except (ValueError, OverflowError):
1439 1434 pass
1440 1435 if len(id) == 40:
1441 1436 try:
1442 1437 # a full hex nodeid?
1443 1438 node = bin(id)
1444 1439 self.rev(node)
1445 1440 return node
1446 1441 except (TypeError, error.LookupError):
1447 1442 pass
1448 1443
1449 1444 def _partialmatch(self, id):
1450 1445 # we don't care wdirfilenodeids as they should be always full hash
1451 1446 maybewdir = wdirhex.startswith(id)
1452 1447 try:
1453 1448 partial = self.index.partialmatch(id)
1454 1449 if partial and self.hasnode(partial):
1455 1450 if maybewdir:
1456 1451 # single 'ff...' match in radix tree, ambiguous with wdir
1457 1452 raise error.RevlogError
1458 1453 return partial
1459 1454 if maybewdir:
1460 1455 # no 'ff...' match in radix tree, wdir identified
1461 1456 raise error.WdirUnsupported
1462 1457 return None
1463 1458 except error.RevlogError:
1464 1459 # parsers.c radix tree lookup gave multiple matches
1465 1460 # fast path: for unfiltered changelog, radix tree is accurate
1466 1461 if not getattr(self, 'filteredrevs', None):
1467 1462 raise error.AmbiguousPrefixLookupError(
1468 1463 id, self.indexfile, _(b'ambiguous identifier')
1469 1464 )
1470 1465 # fall through to slow path that filters hidden revisions
1471 1466 except (AttributeError, ValueError):
1472 1467 # we are pure python, or key was too short to search radix tree
1473 1468 pass
1474 1469
1475 1470 if id in self._pcache:
1476 1471 return self._pcache[id]
1477 1472
1478 1473 if len(id) <= 40:
1479 1474 try:
1480 1475 # hex(node)[:...]
1481 1476 l = len(id) // 2 # grab an even number of digits
1482 1477 prefix = bin(id[: l * 2])
1483 1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1484 1479 nl = [
1485 1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1486 1481 ]
1487 1482 if nullhex.startswith(id):
1488 1483 nl.append(nullid)
1489 1484 if len(nl) > 0:
1490 1485 if len(nl) == 1 and not maybewdir:
1491 1486 self._pcache[id] = nl[0]
1492 1487 return nl[0]
1493 1488 raise error.AmbiguousPrefixLookupError(
1494 1489 id, self.indexfile, _(b'ambiguous identifier')
1495 1490 )
1496 1491 if maybewdir:
1497 1492 raise error.WdirUnsupported
1498 1493 return None
1499 1494 except TypeError:
1500 1495 pass
1501 1496
1502 1497 def lookup(self, id):
1503 1498 """locate a node based on:
1504 1499 - revision number or str(revision number)
1505 1500 - nodeid or subset of hex nodeid
1506 1501 """
1507 1502 n = self._match(id)
1508 1503 if n is not None:
1509 1504 return n
1510 1505 n = self._partialmatch(id)
1511 1506 if n:
1512 1507 return n
1513 1508
1514 1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1515 1510
1516 1511 def shortest(self, node, minlength=1):
1517 1512 """Find the shortest unambiguous prefix that matches node."""
1518 1513
1519 1514 def isvalid(prefix):
1520 1515 try:
1521 1516 matchednode = self._partialmatch(prefix)
1522 1517 except error.AmbiguousPrefixLookupError:
1523 1518 return False
1524 1519 except error.WdirUnsupported:
1525 1520 # single 'ff...' match
1526 1521 return True
1527 1522 if matchednode is None:
1528 1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1529 1524 return True
1530 1525
1531 1526 def maybewdir(prefix):
1532 1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1533 1528
1534 1529 hexnode = hex(node)
1535 1530
1536 1531 def disambiguate(hexnode, minlength):
1537 1532 """Disambiguate against wdirid."""
1538 1533 for length in range(minlength, len(hexnode) + 1):
1539 1534 prefix = hexnode[:length]
1540 1535 if not maybewdir(prefix):
1541 1536 return prefix
1542 1537
1543 1538 if not getattr(self, 'filteredrevs', None):
1544 1539 try:
1545 1540 length = max(self.index.shortest(node), minlength)
1546 1541 return disambiguate(hexnode, length)
1547 1542 except error.RevlogError:
1548 1543 if node != wdirid:
1549 1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1550 1545 except AttributeError:
1551 1546 # Fall through to pure code
1552 1547 pass
1553 1548
1554 1549 if node == wdirid:
1555 1550 for length in range(minlength, len(hexnode) + 1):
1556 1551 prefix = hexnode[:length]
1557 1552 if isvalid(prefix):
1558 1553 return prefix
1559 1554
1560 1555 for length in range(minlength, len(hexnode) + 1):
1561 1556 prefix = hexnode[:length]
1562 1557 if isvalid(prefix):
1563 1558 return disambiguate(hexnode, length)
1564 1559
1565 1560 def cmp(self, node, text):
1566 1561 """compare text with a given file revision
1567 1562
1568 1563 returns True if text is different than what is stored.
1569 1564 """
1570 1565 p1, p2 = self.parents(node)
1571 1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1572 1567
1573 1568 def _cachesegment(self, offset, data):
1574 1569 """Add a segment to the revlog cache.
1575 1570
1576 1571 Accepts an absolute offset and the data that is at that location.
1577 1572 """
1578 1573 o, d = self._chunkcache
1579 1574 # try to add to existing cache
1580 1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1581 1576 self._chunkcache = o, d + data
1582 1577 else:
1583 1578 self._chunkcache = offset, data
1584 1579
1585 1580 def _readsegment(self, offset, length, df=None):
1586 1581 """Load a segment of raw data from the revlog.
1587 1582
1588 1583 Accepts an absolute offset, length to read, and an optional existing
1589 1584 file handle to read from.
1590 1585
1591 1586 If an existing file handle is passed, it will be seeked and the
1592 1587 original seek position will NOT be restored.
1593 1588
1594 1589 Returns a str or buffer of raw byte data.
1595 1590
1596 1591 Raises if the requested number of bytes could not be read.
1597 1592 """
1598 1593 # Cache data both forward and backward around the requested
1599 1594 # data, in a fixed size window. This helps speed up operations
1600 1595 # involving reading the revlog backwards.
1601 1596 cachesize = self._chunkcachesize
1602 1597 realoffset = offset & ~(cachesize - 1)
1603 1598 reallength = (
1604 1599 (offset + length + cachesize) & ~(cachesize - 1)
1605 1600 ) - realoffset
1606 1601 with self._datareadfp(df) as df:
1607 1602 df.seek(realoffset)
1608 1603 d = df.read(reallength)
1609 1604
1610 1605 self._cachesegment(realoffset, d)
1611 1606 if offset != realoffset or reallength != length:
1612 1607 startoffset = offset - realoffset
1613 1608 if len(d) - startoffset < length:
1614 1609 raise error.RevlogError(
1615 1610 _(
1616 1611 b'partial read of revlog %s; expected %d bytes from '
1617 1612 b'offset %d, got %d'
1618 1613 )
1619 1614 % (
1620 1615 self.indexfile if self._inline else self.datafile,
1621 1616 length,
1622 1617 realoffset,
1623 1618 len(d) - startoffset,
1624 1619 )
1625 1620 )
1626 1621
1627 1622 return util.buffer(d, startoffset, length)
1628 1623
1629 1624 if len(d) < length:
1630 1625 raise error.RevlogError(
1631 1626 _(
1632 1627 b'partial read of revlog %s; expected %d bytes from offset '
1633 1628 b'%d, got %d'
1634 1629 )
1635 1630 % (
1636 1631 self.indexfile if self._inline else self.datafile,
1637 1632 length,
1638 1633 offset,
1639 1634 len(d),
1640 1635 )
1641 1636 )
1642 1637
1643 1638 return d
1644 1639
1645 1640 def _getsegment(self, offset, length, df=None):
1646 1641 """Obtain a segment of raw data from the revlog.
1647 1642
1648 1643 Accepts an absolute offset, length of bytes to obtain, and an
1649 1644 optional file handle to the already-opened revlog. If the file
1650 1645 handle is used, it's original seek position will not be preserved.
1651 1646
1652 1647 Requests for data may be returned from a cache.
1653 1648
1654 1649 Returns a str or a buffer instance of raw byte data.
1655 1650 """
1656 1651 o, d = self._chunkcache
1657 1652 l = len(d)
1658 1653
1659 1654 # is it in the cache?
1660 1655 cachestart = offset - o
1661 1656 cacheend = cachestart + length
1662 1657 if cachestart >= 0 and cacheend <= l:
1663 1658 if cachestart == 0 and cacheend == l:
1664 1659 return d # avoid a copy
1665 1660 return util.buffer(d, cachestart, cacheend - cachestart)
1666 1661
1667 1662 return self._readsegment(offset, length, df=df)
1668 1663
1669 1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1670 1665 """Obtain a segment of raw data corresponding to a range of revisions.
1671 1666
1672 1667 Accepts the start and end revisions and an optional already-open
1673 1668 file handle to be used for reading. If the file handle is read, its
1674 1669 seek position will not be preserved.
1675 1670
1676 1671 Requests for data may be satisfied by a cache.
1677 1672
1678 1673 Returns a 2-tuple of (offset, data) for the requested range of
1679 1674 revisions. Offset is the integer offset from the beginning of the
1680 1675 revlog and data is a str or buffer of the raw byte data.
1681 1676
1682 1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1683 1678 to determine where each revision's data begins and ends.
1684 1679 """
1685 1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1686 1681 # (functions are expensive).
1687 1682 index = self.index
1688 1683 istart = index[startrev]
1689 1684 start = int(istart[0] >> 16)
1690 1685 if startrev == endrev:
1691 1686 end = start + istart[1]
1692 1687 else:
1693 1688 iend = index[endrev]
1694 1689 end = int(iend[0] >> 16) + iend[1]
1695 1690
1696 1691 if self._inline:
1697 1692 start += (startrev + 1) * self._io.size
1698 1693 end += (endrev + 1) * self._io.size
1699 1694 length = end - start
1700 1695
1701 1696 return start, self._getsegment(start, length, df=df)
1702 1697
1703 1698 def _chunk(self, rev, df=None):
1704 1699 """Obtain a single decompressed chunk for a revision.
1705 1700
1706 1701 Accepts an integer revision and an optional already-open file handle
1707 1702 to be used for reading. If used, the seek position of the file will not
1708 1703 be preserved.
1709 1704
1710 1705 Returns a str holding uncompressed data for the requested revision.
1711 1706 """
1712 1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1713 1708
1714 1709 def _chunks(self, revs, df=None, targetsize=None):
1715 1710 """Obtain decompressed chunks for the specified revisions.
1716 1711
1717 1712 Accepts an iterable of numeric revisions that are assumed to be in
1718 1713 ascending order. Also accepts an optional already-open file handle
1719 1714 to be used for reading. If used, the seek position of the file will
1720 1715 not be preserved.
1721 1716
1722 1717 This function is similar to calling ``self._chunk()`` multiple times,
1723 1718 but is faster.
1724 1719
1725 1720 Returns a list with decompressed data for each requested revision.
1726 1721 """
1727 1722 if not revs:
1728 1723 return []
1729 1724 start = self.start
1730 1725 length = self.length
1731 1726 inline = self._inline
1732 1727 iosize = self._io.size
1733 1728 buffer = util.buffer
1734 1729
1735 1730 l = []
1736 1731 ladd = l.append
1737 1732
1738 1733 if not self._withsparseread:
1739 1734 slicedchunks = (revs,)
1740 1735 else:
1741 1736 slicedchunks = deltautil.slicechunk(
1742 1737 self, revs, targetsize=targetsize
1743 1738 )
1744 1739
1745 1740 for revschunk in slicedchunks:
1746 1741 firstrev = revschunk[0]
1747 1742 # Skip trailing revisions with empty diff
1748 1743 for lastrev in revschunk[::-1]:
1749 1744 if length(lastrev) != 0:
1750 1745 break
1751 1746
1752 1747 try:
1753 1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1754 1749 except OverflowError:
1755 1750 # issue4215 - we can't cache a run of chunks greater than
1756 1751 # 2G on Windows
1757 1752 return [self._chunk(rev, df=df) for rev in revschunk]
1758 1753
1759 1754 decomp = self.decompress
1760 1755 for rev in revschunk:
1761 1756 chunkstart = start(rev)
1762 1757 if inline:
1763 1758 chunkstart += (rev + 1) * iosize
1764 1759 chunklength = length(rev)
1765 1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1766 1761
1767 1762 return l
1768 1763
1769 1764 def _chunkclear(self):
1770 1765 """Clear the raw chunk cache."""
1771 1766 self._chunkcache = (0, b'')
1772 1767
1773 1768 def deltaparent(self, rev):
1774 1769 """return deltaparent of the given revision"""
1775 1770 base = self.index[rev][3]
1776 1771 if base == rev:
1777 1772 return nullrev
1778 1773 elif self._generaldelta:
1779 1774 return base
1780 1775 else:
1781 1776 return rev - 1
1782 1777
1783 1778 def issnapshot(self, rev):
1784 1779 """tells whether rev is a snapshot"""
1785 1780 if not self._sparserevlog:
1786 1781 return self.deltaparent(rev) == nullrev
1787 1782 elif util.safehasattr(self.index, b'issnapshot'):
1788 1783 # directly assign the method to cache the testing and access
1789 1784 self.issnapshot = self.index.issnapshot
1790 1785 return self.issnapshot(rev)
1791 1786 if rev == nullrev:
1792 1787 return True
1793 1788 entry = self.index[rev]
1794 1789 base = entry[3]
1795 1790 if base == rev:
1796 1791 return True
1797 1792 if base == nullrev:
1798 1793 return True
1799 1794 p1 = entry[5]
1800 1795 p2 = entry[6]
1801 1796 if base == p1 or base == p2:
1802 1797 return False
1803 1798 return self.issnapshot(base)
1804 1799
1805 1800 def snapshotdepth(self, rev):
1806 1801 """number of snapshot in the chain before this one"""
1807 1802 if not self.issnapshot(rev):
1808 1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1809 1804 return len(self._deltachain(rev)[0]) - 1
1810 1805
1811 1806 def revdiff(self, rev1, rev2):
1812 1807 """return or calculate a delta between two revisions
1813 1808
1814 1809 The delta calculated is in binary form and is intended to be written to
1815 1810 revlog data directly. So this function needs raw revision data.
1816 1811 """
1817 1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1818 1813 return bytes(self._chunk(rev2))
1819 1814
1820 1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1821 1816
1822 1817 def _processflags(self, text, flags, operation, raw=False):
1823 1818 """deprecated entry point to access flag processors"""
1824 1819 msg = b'_processflag(...) use the specialized variant'
1825 1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1826 1821 if raw:
1827 1822 return text, flagutil.processflagsraw(self, text, flags)
1828 1823 elif operation == b'read':
1829 1824 return flagutil.processflagsread(self, text, flags)
1830 1825 else: # write operation
1831 1826 return flagutil.processflagswrite(self, text, flags, None)
1832 1827
1833 1828 def revision(self, nodeorrev, _df=None, raw=False):
1834 1829 """return an uncompressed revision of a given node or revision
1835 1830 number.
1836 1831
1837 1832 _df - an existing file handle to read from. (internal-only)
1838 1833 raw - an optional argument specifying if the revision data is to be
1839 1834 treated as raw data when applying flag transforms. 'raw' should be set
1840 1835 to True when generating changegroups or in debug commands.
1841 1836 """
1842 1837 if raw:
1843 1838 msg = (
1844 1839 b'revlog.revision(..., raw=True) is deprecated, '
1845 1840 b'use revlog.rawdata(...)'
1846 1841 )
1847 1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1848 1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1849 1844
1850 1845 def sidedata(self, nodeorrev, _df=None):
1851 1846 """a map of extra data related to the changeset but not part of the hash
1852 1847
1853 1848 This function currently return a dictionary. However, more advanced
1854 1849 mapping object will likely be used in the future for a more
1855 1850 efficient/lazy code.
1856 1851 """
1857 1852 return self._revisiondata(nodeorrev, _df)[1]
1858 1853
1859 1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1860 1855 # deal with <nodeorrev> argument type
1861 1856 if isinstance(nodeorrev, int):
1862 1857 rev = nodeorrev
1863 1858 node = self.node(rev)
1864 1859 else:
1865 1860 node = nodeorrev
1866 1861 rev = None
1867 1862
1868 1863 # fast path the special `nullid` rev
1869 1864 if node == nullid:
1870 1865 return b"", {}
1871 1866
1872 1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1873 1868 # revision or might need to be processed to retrieve the revision.
1874 1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1875 1870
1876 1871 if raw and validated:
1877 1872 # if we don't want to process the raw text and that raw
1878 1873 # text is cached, we can exit early.
1879 1874 return rawtext, {}
1880 1875 if rev is None:
1881 1876 rev = self.rev(node)
1882 1877 # the revlog's flag for this revision
1883 1878 # (usually alter its state or content)
1884 1879 flags = self.flags(rev)
1885 1880
1886 1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1887 1882 # no extra flags set, no flag processor runs, text = rawtext
1888 1883 return rawtext, {}
1889 1884
1890 1885 sidedata = {}
1891 1886 if raw:
1892 1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1893 1888 text = rawtext
1894 1889 else:
1895 1890 try:
1896 1891 r = flagutil.processflagsread(self, rawtext, flags)
1897 1892 except error.SidedataHashError as exc:
1898 1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1899 1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1900 1895 raise error.RevlogError(msg)
1901 1896 text, validatehash, sidedata = r
1902 1897 if validatehash:
1903 1898 self.checkhash(text, node, rev=rev)
1904 1899 if not validated:
1905 1900 self._revisioncache = (node, rev, rawtext)
1906 1901
1907 1902 return text, sidedata
1908 1903
1909 1904 def _rawtext(self, node, rev, _df=None):
1910 1905 """return the possibly unvalidated rawtext for a revision
1911 1906
1912 1907 returns (rev, rawtext, validated)
1913 1908 """
1914 1909
1915 1910 # revision in the cache (could be useful to apply delta)
1916 1911 cachedrev = None
1917 1912 # An intermediate text to apply deltas to
1918 1913 basetext = None
1919 1914
1920 1915 # Check if we have the entry in cache
1921 1916 # The cache entry looks like (node, rev, rawtext)
1922 1917 if self._revisioncache:
1923 1918 if self._revisioncache[0] == node:
1924 1919 return (rev, self._revisioncache[2], True)
1925 1920 cachedrev = self._revisioncache[1]
1926 1921
1927 1922 if rev is None:
1928 1923 rev = self.rev(node)
1929 1924
1930 1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1931 1926 if stopped:
1932 1927 basetext = self._revisioncache[2]
1933 1928
1934 1929 # drop cache to save memory, the caller is expected to
1935 1930 # update self._revisioncache after validating the text
1936 1931 self._revisioncache = None
1937 1932
1938 1933 targetsize = None
1939 1934 rawsize = self.index[rev][2]
1940 1935 if 0 <= rawsize:
1941 1936 targetsize = 4 * rawsize
1942 1937
1943 1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1944 1939 if basetext is None:
1945 1940 basetext = bytes(bins[0])
1946 1941 bins = bins[1:]
1947 1942
1948 1943 rawtext = mdiff.patches(basetext, bins)
1949 1944 del basetext # let us have a chance to free memory early
1950 1945 return (rev, rawtext, False)
1951 1946
1952 1947 def rawdata(self, nodeorrev, _df=None):
1953 1948 """return an uncompressed raw data of a given node or revision number.
1954 1949
1955 1950 _df - an existing file handle to read from. (internal-only)
1956 1951 """
1957 1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1958 1953
1959 1954 def hash(self, text, p1, p2):
1960 1955 """Compute a node hash.
1961 1956
1962 1957 Available as a function so that subclasses can replace the hash
1963 1958 as needed.
1964 1959 """
1965 1960 return storageutil.hashrevisionsha1(text, p1, p2)
1966 1961
1967 1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1968 1963 """Check node hash integrity.
1969 1964
1970 1965 Available as a function so that subclasses can extend hash mismatch
1971 1966 behaviors as needed.
1972 1967 """
1973 1968 try:
1974 1969 if p1 is None and p2 is None:
1975 1970 p1, p2 = self.parents(node)
1976 1971 if node != self.hash(text, p1, p2):
1977 1972 # Clear the revision cache on hash failure. The revision cache
1978 1973 # only stores the raw revision and clearing the cache does have
1979 1974 # the side-effect that we won't have a cache hit when the raw
1980 1975 # revision data is accessed. But this case should be rare and
1981 1976 # it is extra work to teach the cache about the hash
1982 1977 # verification state.
1983 1978 if self._revisioncache and self._revisioncache[0] == node:
1984 1979 self._revisioncache = None
1985 1980
1986 1981 revornode = rev
1987 1982 if revornode is None:
1988 1983 revornode = templatefilters.short(hex(node))
1989 1984 raise error.RevlogError(
1990 1985 _(b"integrity check failed on %s:%s")
1991 1986 % (self.indexfile, pycompat.bytestr(revornode))
1992 1987 )
1993 1988 except error.RevlogError:
1994 1989 if self._censorable and storageutil.iscensoredtext(text):
1995 1990 raise error.CensoredNodeError(self.indexfile, node, text)
1996 1991 raise
1997 1992
1998 1993 def _enforceinlinesize(self, tr, fp=None):
1999 1994 """Check if the revlog is too big for inline and convert if so.
2000 1995
2001 1996 This should be called after revisions are added to the revlog. If the
2002 1997 revlog has grown too large to be an inline revlog, it will convert it
2003 1998 to use multiple index and data files.
2004 1999 """
2005 2000 tiprev = len(self) - 1
2006 2001 if (
2007 2002 not self._inline
2008 2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2009 2004 ):
2010 2005 return
2011 2006
2012 2007 troffset = tr.findoffset(self.indexfile)
2013 2008 if troffset is None:
2014 2009 raise error.RevlogError(
2015 2010 _(b"%s not found in the transaction") % self.indexfile
2016 2011 )
2017 2012 trindex = 0
2018 2013 tr.add(self.datafile, 0)
2019 2014
2020 2015 if fp:
2021 2016 fp.flush()
2022 2017 fp.close()
2023 2018 # We can't use the cached file handle after close(). So prevent
2024 2019 # its usage.
2025 2020 self._writinghandles = None
2026 2021
2027 2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2028 2023 for r in self:
2029 2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2030 2025 if troffset <= self.start(r):
2031 2026 trindex = r
2032 2027
2033 2028 with self._indexfp(b'w') as fp:
2034 2029 self.version &= ~FLAG_INLINE_DATA
2035 2030 self._inline = False
2036 2031 io = self._io
2037 2032 for i in self:
2038 2033 e = io.packentry(self.index[i], self.node, self.version, i)
2039 2034 fp.write(e)
2040 2035
2041 2036 # the temp file replace the real index when we exit the context
2042 2037 # manager
2043 2038
2044 2039 tr.replace(self.indexfile, trindex * self._io.size)
2045 2040 nodemaputil.setup_persistent_nodemap(tr, self)
2046 2041 self._chunkclear()
2047 2042
2048 2043 def _nodeduplicatecallback(self, transaction, node):
2049 2044 """called when trying to add a node already stored."""
2050 2045
2051 2046 def addrevision(
2052 2047 self,
2053 2048 text,
2054 2049 transaction,
2055 2050 link,
2056 2051 p1,
2057 2052 p2,
2058 2053 cachedelta=None,
2059 2054 node=None,
2060 2055 flags=REVIDX_DEFAULT_FLAGS,
2061 2056 deltacomputer=None,
2062 2057 sidedata=None,
2063 2058 ):
2064 2059 """add a revision to the log
2065 2060
2066 2061 text - the revision data to add
2067 2062 transaction - the transaction object used for rollback
2068 2063 link - the linkrev data to add
2069 2064 p1, p2 - the parent nodeids of the revision
2070 2065 cachedelta - an optional precomputed delta
2071 2066 node - nodeid of revision; typically node is not specified, and it is
2072 2067 computed by default as hash(text, p1, p2), however subclasses might
2073 2068 use different hashing method (and override checkhash() in such case)
2074 2069 flags - the known flags to set on the revision
2075 2070 deltacomputer - an optional deltacomputer instance shared between
2076 2071 multiple calls
2077 2072 """
2078 2073 if link == nullrev:
2079 2074 raise error.RevlogError(
2080 2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2081 2076 )
2082 2077
2083 2078 if sidedata is None:
2084 2079 sidedata = {}
2085 2080 flags = flags & ~REVIDX_SIDEDATA
2086 2081 elif not self.hassidedata:
2087 2082 raise error.ProgrammingError(
2088 2083 _(b"trying to add sidedata to a revlog who don't support them")
2089 2084 )
2090 2085 else:
2091 2086 flags |= REVIDX_SIDEDATA
2092 2087
2093 2088 if flags:
2094 2089 node = node or self.hash(text, p1, p2)
2095 2090
2096 2091 rawtext, validatehash = flagutil.processflagswrite(
2097 2092 self, text, flags, sidedata=sidedata
2098 2093 )
2099 2094
2100 2095 # If the flag processor modifies the revision data, ignore any provided
2101 2096 # cachedelta.
2102 2097 if rawtext != text:
2103 2098 cachedelta = None
2104 2099
2105 2100 if len(rawtext) > _maxentrysize:
2106 2101 raise error.RevlogError(
2107 2102 _(
2108 2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2109 2104 )
2110 2105 % (self.indexfile, len(rawtext))
2111 2106 )
2112 2107
2113 2108 node = node or self.hash(rawtext, p1, p2)
2114 2109 if self.index.has_node(node):
2115 2110 return node
2116 2111
2117 2112 if validatehash:
2118 2113 self.checkhash(rawtext, node, p1=p1, p2=p2)
2119 2114
2120 2115 return self.addrawrevision(
2121 2116 rawtext,
2122 2117 transaction,
2123 2118 link,
2124 2119 p1,
2125 2120 p2,
2126 2121 node,
2127 2122 flags,
2128 2123 cachedelta=cachedelta,
2129 2124 deltacomputer=deltacomputer,
2130 2125 )
2131 2126
2132 2127 def addrawrevision(
2133 2128 self,
2134 2129 rawtext,
2135 2130 transaction,
2136 2131 link,
2137 2132 p1,
2138 2133 p2,
2139 2134 node,
2140 2135 flags,
2141 2136 cachedelta=None,
2142 2137 deltacomputer=None,
2143 2138 ):
2144 2139 """add a raw revision with known flags, node and parents
2145 2140 useful when reusing a revision not stored in this revlog (ex: received
2146 2141 over wire, or read from an external bundle).
2147 2142 """
2148 2143 dfh = None
2149 2144 if not self._inline:
2150 2145 dfh = self._datafp(b"a+")
2151 2146 ifh = self._indexfp(b"a+")
2152 2147 try:
2153 2148 return self._addrevision(
2154 2149 node,
2155 2150 rawtext,
2156 2151 transaction,
2157 2152 link,
2158 2153 p1,
2159 2154 p2,
2160 2155 flags,
2161 2156 cachedelta,
2162 2157 ifh,
2163 2158 dfh,
2164 2159 deltacomputer=deltacomputer,
2165 2160 )
2166 2161 finally:
2167 2162 if dfh:
2168 2163 dfh.close()
2169 2164 ifh.close()
2170 2165
2171 2166 def compress(self, data):
2172 2167 """Generate a possibly-compressed representation of data."""
2173 2168 if not data:
2174 2169 return b'', data
2175 2170
2176 2171 compressed = self._compressor.compress(data)
2177 2172
2178 2173 if compressed:
2179 2174 # The revlog compressor added the header in the returned data.
2180 2175 return b'', compressed
2181 2176
2182 2177 if data[0:1] == b'\0':
2183 2178 return b'', data
2184 2179 return b'u', data
2185 2180
2186 2181 def decompress(self, data):
2187 2182 """Decompress a revlog chunk.
2188 2183
2189 2184 The chunk is expected to begin with a header identifying the
2190 2185 format type so it can be routed to an appropriate decompressor.
2191 2186 """
2192 2187 if not data:
2193 2188 return data
2194 2189
2195 2190 # Revlogs are read much more frequently than they are written and many
2196 2191 # chunks only take microseconds to decompress, so performance is
2197 2192 # important here.
2198 2193 #
2199 2194 # We can make a few assumptions about revlogs:
2200 2195 #
2201 2196 # 1) the majority of chunks will be compressed (as opposed to inline
2202 2197 # raw data).
2203 2198 # 2) decompressing *any* data will likely by at least 10x slower than
2204 2199 # returning raw inline data.
2205 2200 # 3) we want to prioritize common and officially supported compression
2206 2201 # engines
2207 2202 #
2208 2203 # It follows that we want to optimize for "decompress compressed data
2209 2204 # when encoded with common and officially supported compression engines"
2210 2205 # case over "raw data" and "data encoded by less common or non-official
2211 2206 # compression engines." That is why we have the inline lookup first
2212 2207 # followed by the compengines lookup.
2213 2208 #
2214 2209 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2215 2210 # compressed chunks. And this matters for changelog and manifest reads.
2216 2211 t = data[0:1]
2217 2212
2218 2213 if t == b'x':
2219 2214 try:
2220 2215 return _zlibdecompress(data)
2221 2216 except zlib.error as e:
2222 2217 raise error.RevlogError(
2223 2218 _(b'revlog decompress error: %s')
2224 2219 % stringutil.forcebytestr(e)
2225 2220 )
2226 2221 # '\0' is more common than 'u' so it goes first.
2227 2222 elif t == b'\0':
2228 2223 return data
2229 2224 elif t == b'u':
2230 2225 return util.buffer(data, 1)
2231 2226
2232 2227 try:
2233 2228 compressor = self._decompressors[t]
2234 2229 except KeyError:
2235 2230 try:
2236 2231 engine = util.compengines.forrevlogheader(t)
2237 2232 compressor = engine.revlogcompressor(self._compengineopts)
2238 2233 self._decompressors[t] = compressor
2239 2234 except KeyError:
2240 2235 raise error.RevlogError(_(b'unknown compression type %r') % t)
2241 2236
2242 2237 return compressor.decompress(data)
2243 2238
2244 2239 def _addrevision(
2245 2240 self,
2246 2241 node,
2247 2242 rawtext,
2248 2243 transaction,
2249 2244 link,
2250 2245 p1,
2251 2246 p2,
2252 2247 flags,
2253 2248 cachedelta,
2254 2249 ifh,
2255 2250 dfh,
2256 2251 alwayscache=False,
2257 2252 deltacomputer=None,
2258 2253 ):
2259 2254 """internal function to add revisions to the log
2260 2255
2261 2256 see addrevision for argument descriptions.
2262 2257
2263 2258 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2264 2259
2265 2260 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2266 2261 be used.
2267 2262
2268 2263 invariants:
2269 2264 - rawtext is optional (can be None); if not set, cachedelta must be set.
2270 2265 if both are set, they must correspond to each other.
2271 2266 """
2272 2267 if node == nullid:
2273 2268 raise error.RevlogError(
2274 2269 _(b"%s: attempt to add null revision") % self.indexfile
2275 2270 )
2276 2271 if node == wdirid or node in wdirfilenodeids:
2277 2272 raise error.RevlogError(
2278 2273 _(b"%s: attempt to add wdir revision") % self.indexfile
2279 2274 )
2280 2275
2281 2276 if self._inline:
2282 2277 fh = ifh
2283 2278 else:
2284 2279 fh = dfh
2285 2280
2286 2281 btext = [rawtext]
2287 2282
2288 2283 curr = len(self)
2289 2284 prev = curr - 1
2290 2285 offset = self.end(prev)
2291 2286 p1r, p2r = self.rev(p1), self.rev(p2)
2292 2287
2293 2288 # full versions are inserted when the needed deltas
2294 2289 # become comparable to the uncompressed text
2295 2290 if rawtext is None:
2296 2291 # need rawtext size, before changed by flag processors, which is
2297 2292 # the non-raw size. use revlog explicitly to avoid filelog's extra
2298 2293 # logic that might remove metadata size.
2299 2294 textlen = mdiff.patchedsize(
2300 2295 revlog.size(self, cachedelta[0]), cachedelta[1]
2301 2296 )
2302 2297 else:
2303 2298 textlen = len(rawtext)
2304 2299
2305 2300 if deltacomputer is None:
2306 2301 deltacomputer = deltautil.deltacomputer(self)
2307 2302
2308 2303 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2309 2304
2310 2305 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2311 2306
2312 2307 e = (
2313 2308 offset_type(offset, flags),
2314 2309 deltainfo.deltalen,
2315 2310 textlen,
2316 2311 deltainfo.base,
2317 2312 link,
2318 2313 p1r,
2319 2314 p2r,
2320 2315 node,
2321 2316 )
2322 2317 self.index.append(e)
2323 2318
2324 2319 entry = self._io.packentry(e, self.node, self.version, curr)
2325 2320 self._writeentry(
2326 2321 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2327 2322 )
2328 2323
2329 2324 rawtext = btext[0]
2330 2325
2331 2326 if alwayscache and rawtext is None:
2332 2327 rawtext = deltacomputer.buildtext(revinfo, fh)
2333 2328
2334 2329 if type(rawtext) == bytes: # only accept immutable objects
2335 2330 self._revisioncache = (node, curr, rawtext)
2336 2331 self._chainbasecache[curr] = deltainfo.chainbase
2337 2332 return node
2338 2333
2339 2334 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2340 2335 # Files opened in a+ mode have inconsistent behavior on various
2341 2336 # platforms. Windows requires that a file positioning call be made
2342 2337 # when the file handle transitions between reads and writes. See
2343 2338 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2344 2339 # platforms, Python or the platform itself can be buggy. Some versions
2345 2340 # of Solaris have been observed to not append at the end of the file
2346 2341 # if the file was seeked to before the end. See issue4943 for more.
2347 2342 #
2348 2343 # We work around this issue by inserting a seek() before writing.
2349 2344 # Note: This is likely not necessary on Python 3. However, because
2350 2345 # the file handle is reused for reads and may be seeked there, we need
2351 2346 # to be careful before changing this.
2352 2347 ifh.seek(0, os.SEEK_END)
2353 2348 if dfh:
2354 2349 dfh.seek(0, os.SEEK_END)
2355 2350
2356 2351 curr = len(self) - 1
2357 2352 if not self._inline:
2358 2353 transaction.add(self.datafile, offset)
2359 2354 transaction.add(self.indexfile, curr * len(entry))
2360 2355 if data[0]:
2361 2356 dfh.write(data[0])
2362 2357 dfh.write(data[1])
2363 2358 ifh.write(entry)
2364 2359 else:
2365 2360 offset += curr * self._io.size
2366 2361 transaction.add(self.indexfile, offset)
2367 2362 ifh.write(entry)
2368 2363 ifh.write(data[0])
2369 2364 ifh.write(data[1])
2370 2365 self._enforceinlinesize(transaction, ifh)
2371 2366 nodemaputil.setup_persistent_nodemap(transaction, self)
2372 2367
2373 2368 def addgroup(
2374 2369 self,
2375 2370 deltas,
2376 2371 linkmapper,
2377 2372 transaction,
2378 2373 alwayscache=False,
2379 2374 addrevisioncb=None,
2380 2375 duplicaterevisioncb=None,
2381 2376 ):
2382 2377 """
2383 2378 add a delta group
2384 2379
2385 2380 given a set of deltas, add them to the revision log. the
2386 2381 first delta is against its parent, which should be in our
2387 2382 log, the rest are against the previous delta.
2388 2383
2389 2384 If ``addrevisioncb`` is defined, it will be called with arguments of
2390 2385 this revlog and the node that was added.
2391 2386 """
2392 2387
2393 2388 if self._writinghandles:
2394 2389 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2395 2390
2396 2391 r = len(self)
2397 2392 end = 0
2398 2393 if r:
2399 2394 end = self.end(r - 1)
2400 2395 ifh = self._indexfp(b"a+")
2401 2396 isize = r * self._io.size
2402 2397 if self._inline:
2403 2398 transaction.add(self.indexfile, end + isize)
2404 2399 dfh = None
2405 2400 else:
2406 2401 transaction.add(self.indexfile, isize)
2407 2402 transaction.add(self.datafile, end)
2408 2403 dfh = self._datafp(b"a+")
2409 2404
2410 2405 def flush():
2411 2406 if dfh:
2412 2407 dfh.flush()
2413 2408 ifh.flush()
2414 2409
2415 2410 self._writinghandles = (ifh, dfh)
2416 2411 empty = True
2417 2412
2418 2413 try:
2419 2414 deltacomputer = deltautil.deltacomputer(self)
2420 2415 # loop through our set of deltas
2421 2416 for data in deltas:
2422 2417 node, p1, p2, linknode, deltabase, delta, flags = data
2423 2418 link = linkmapper(linknode)
2424 2419 flags = flags or REVIDX_DEFAULT_FLAGS
2425 2420
2426 2421 if self.index.has_node(node):
2427 2422 # this can happen if two branches make the same change
2428 2423 self._nodeduplicatecallback(transaction, node)
2429 2424 if duplicaterevisioncb:
2430 2425 duplicaterevisioncb(self, node)
2431 2426 empty = False
2432 2427 continue
2433 2428
2434 2429 for p in (p1, p2):
2435 2430 if not self.index.has_node(p):
2436 2431 raise error.LookupError(
2437 2432 p, self.indexfile, _(b'unknown parent')
2438 2433 )
2439 2434
2440 2435 if not self.index.has_node(deltabase):
2441 2436 raise error.LookupError(
2442 2437 deltabase, self.indexfile, _(b'unknown delta base')
2443 2438 )
2444 2439
2445 2440 baserev = self.rev(deltabase)
2446 2441
2447 2442 if baserev != nullrev and self.iscensored(baserev):
2448 2443 # if base is censored, delta must be full replacement in a
2449 2444 # single patch operation
2450 2445 hlen = struct.calcsize(b">lll")
2451 2446 oldlen = self.rawsize(baserev)
2452 2447 newlen = len(delta) - hlen
2453 2448 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2454 2449 raise error.CensoredBaseError(
2455 2450 self.indexfile, self.node(baserev)
2456 2451 )
2457 2452
2458 2453 if not flags and self._peek_iscensored(baserev, delta, flush):
2459 2454 flags |= REVIDX_ISCENSORED
2460 2455
2461 2456 # We assume consumers of addrevisioncb will want to retrieve
2462 2457 # the added revision, which will require a call to
2463 2458 # revision(). revision() will fast path if there is a cache
2464 2459 # hit. So, we tell _addrevision() to always cache in this case.
2465 2460 # We're only using addgroup() in the context of changegroup
2466 2461 # generation so the revision data can always be handled as raw
2467 2462 # by the flagprocessor.
2468 2463 self._addrevision(
2469 2464 node,
2470 2465 None,
2471 2466 transaction,
2472 2467 link,
2473 2468 p1,
2474 2469 p2,
2475 2470 flags,
2476 2471 (baserev, delta),
2477 2472 ifh,
2478 2473 dfh,
2479 2474 alwayscache=alwayscache,
2480 2475 deltacomputer=deltacomputer,
2481 2476 )
2482 2477
2483 2478 if addrevisioncb:
2484 2479 addrevisioncb(self, node)
2485 2480 empty = False
2486 2481
2487 2482 if not dfh and not self._inline:
2488 2483 # addrevision switched from inline to conventional
2489 2484 # reopen the index
2490 2485 ifh.close()
2491 2486 dfh = self._datafp(b"a+")
2492 2487 ifh = self._indexfp(b"a+")
2493 2488 self._writinghandles = (ifh, dfh)
2494 2489 finally:
2495 2490 self._writinghandles = None
2496 2491
2497 2492 if dfh:
2498 2493 dfh.close()
2499 2494 ifh.close()
2500 2495 return not empty
2501 2496
2502 2497 def iscensored(self, rev):
2503 2498 """Check if a file revision is censored."""
2504 2499 if not self._censorable:
2505 2500 return False
2506 2501
2507 2502 return self.flags(rev) & REVIDX_ISCENSORED
2508 2503
2509 2504 def _peek_iscensored(self, baserev, delta, flush):
2510 2505 """Quickly check if a delta produces a censored revision."""
2511 2506 if not self._censorable:
2512 2507 return False
2513 2508
2514 2509 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2515 2510
2516 2511 def getstrippoint(self, minlink):
2517 2512 """find the minimum rev that must be stripped to strip the linkrev
2518 2513
2519 2514 Returns a tuple containing the minimum rev and a set of all revs that
2520 2515 have linkrevs that will be broken by this strip.
2521 2516 """
2522 2517 return storageutil.resolvestripinfo(
2523 2518 minlink,
2524 2519 len(self) - 1,
2525 2520 self.headrevs(),
2526 2521 self.linkrev,
2527 2522 self.parentrevs,
2528 2523 )
2529 2524
2530 2525 def strip(self, minlink, transaction):
2531 2526 """truncate the revlog on the first revision with a linkrev >= minlink
2532 2527
2533 2528 This function is called when we're stripping revision minlink and
2534 2529 its descendants from the repository.
2535 2530
2536 2531 We have to remove all revisions with linkrev >= minlink, because
2537 2532 the equivalent changelog revisions will be renumbered after the
2538 2533 strip.
2539 2534
2540 2535 So we truncate the revlog on the first of these revisions, and
2541 2536 trust that the caller has saved the revisions that shouldn't be
2542 2537 removed and that it'll re-add them after this truncation.
2543 2538 """
2544 2539 if len(self) == 0:
2545 2540 return
2546 2541
2547 2542 rev, _ = self.getstrippoint(minlink)
2548 2543 if rev == len(self):
2549 2544 return
2550 2545
2551 2546 # first truncate the files on disk
2552 2547 end = self.start(rev)
2553 2548 if not self._inline:
2554 2549 transaction.add(self.datafile, end)
2555 2550 end = rev * self._io.size
2556 2551 else:
2557 2552 end += rev * self._io.size
2558 2553
2559 2554 transaction.add(self.indexfile, end)
2560 2555
2561 2556 # then reset internal state in memory to forget those revisions
2562 2557 self._revisioncache = None
2563 2558 self._chaininfocache = util.lrucachedict(500)
2564 2559 self._chunkclear()
2565 2560
2566 2561 del self.index[rev:-1]
2567 2562
2568 2563 def checksize(self):
2569 2564 """Check size of index and data files
2570 2565
2571 2566 return a (dd, di) tuple.
2572 2567 - dd: extra bytes for the "data" file
2573 2568 - di: extra bytes for the "index" file
2574 2569
2575 2570 A healthy revlog will return (0, 0).
2576 2571 """
2577 2572 expected = 0
2578 2573 if len(self):
2579 2574 expected = max(0, self.end(len(self) - 1))
2580 2575
2581 2576 try:
2582 2577 with self._datafp() as f:
2583 2578 f.seek(0, io.SEEK_END)
2584 2579 actual = f.tell()
2585 2580 dd = actual - expected
2586 2581 except IOError as inst:
2587 2582 if inst.errno != errno.ENOENT:
2588 2583 raise
2589 2584 dd = 0
2590 2585
2591 2586 try:
2592 2587 f = self.opener(self.indexfile)
2593 2588 f.seek(0, io.SEEK_END)
2594 2589 actual = f.tell()
2595 2590 f.close()
2596 2591 s = self._io.size
2597 2592 i = max(0, actual // s)
2598 2593 di = actual - (i * s)
2599 2594 if self._inline:
2600 2595 databytes = 0
2601 2596 for r in self:
2602 2597 databytes += max(0, self.length(r))
2603 2598 dd = 0
2604 2599 di = actual - len(self) * s - databytes
2605 2600 except IOError as inst:
2606 2601 if inst.errno != errno.ENOENT:
2607 2602 raise
2608 2603 di = 0
2609 2604
2610 2605 return (dd, di)
2611 2606
2612 2607 def files(self):
2613 2608 res = [self.indexfile]
2614 2609 if not self._inline:
2615 2610 res.append(self.datafile)
2616 2611 return res
2617 2612
2618 2613 def emitrevisions(
2619 2614 self,
2620 2615 nodes,
2621 2616 nodesorder=None,
2622 2617 revisiondata=False,
2623 2618 assumehaveparentrevisions=False,
2624 2619 deltamode=repository.CG_DELTAMODE_STD,
2625 2620 ):
2626 2621 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2627 2622 raise error.ProgrammingError(
2628 2623 b'unhandled value for nodesorder: %s' % nodesorder
2629 2624 )
2630 2625
2631 2626 if nodesorder is None and not self._generaldelta:
2632 2627 nodesorder = b'storage'
2633 2628
2634 2629 if (
2635 2630 not self._storedeltachains
2636 2631 and deltamode != repository.CG_DELTAMODE_PREV
2637 2632 ):
2638 2633 deltamode = repository.CG_DELTAMODE_FULL
2639 2634
2640 2635 return storageutil.emitrevisions(
2641 2636 self,
2642 2637 nodes,
2643 2638 nodesorder,
2644 2639 revlogrevisiondelta,
2645 2640 deltaparentfn=self.deltaparent,
2646 2641 candeltafn=self.candelta,
2647 2642 rawsizefn=self.rawsize,
2648 2643 revdifffn=self.revdiff,
2649 2644 flagsfn=self.flags,
2650 2645 deltamode=deltamode,
2651 2646 revisiondata=revisiondata,
2652 2647 assumehaveparentrevisions=assumehaveparentrevisions,
2653 2648 )
2654 2649
2655 2650 DELTAREUSEALWAYS = b'always'
2656 2651 DELTAREUSESAMEREVS = b'samerevs'
2657 2652 DELTAREUSENEVER = b'never'
2658 2653
2659 2654 DELTAREUSEFULLADD = b'fulladd'
2660 2655
2661 2656 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2662 2657
2663 2658 def clone(
2664 2659 self,
2665 2660 tr,
2666 2661 destrevlog,
2667 2662 addrevisioncb=None,
2668 2663 deltareuse=DELTAREUSESAMEREVS,
2669 2664 forcedeltabothparents=None,
2670 2665 sidedatacompanion=None,
2671 2666 ):
2672 2667 """Copy this revlog to another, possibly with format changes.
2673 2668
2674 2669 The destination revlog will contain the same revisions and nodes.
2675 2670 However, it may not be bit-for-bit identical due to e.g. delta encoding
2676 2671 differences.
2677 2672
2678 2673 The ``deltareuse`` argument control how deltas from the existing revlog
2679 2674 are preserved in the destination revlog. The argument can have the
2680 2675 following values:
2681 2676
2682 2677 DELTAREUSEALWAYS
2683 2678 Deltas will always be reused (if possible), even if the destination
2684 2679 revlog would not select the same revisions for the delta. This is the
2685 2680 fastest mode of operation.
2686 2681 DELTAREUSESAMEREVS
2687 2682 Deltas will be reused if the destination revlog would pick the same
2688 2683 revisions for the delta. This mode strikes a balance between speed
2689 2684 and optimization.
2690 2685 DELTAREUSENEVER
2691 2686 Deltas will never be reused. This is the slowest mode of execution.
2692 2687 This mode can be used to recompute deltas (e.g. if the diff/delta
2693 2688 algorithm changes).
2694 2689 DELTAREUSEFULLADD
2695 2690 Revision will be re-added as if their were new content. This is
2696 2691 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2697 2692 eg: large file detection and handling.
2698 2693
2699 2694 Delta computation can be slow, so the choice of delta reuse policy can
2700 2695 significantly affect run time.
2701 2696
2702 2697 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2703 2698 two extremes. Deltas will be reused if they are appropriate. But if the
2704 2699 delta could choose a better revision, it will do so. This means if you
2705 2700 are converting a non-generaldelta revlog to a generaldelta revlog,
2706 2701 deltas will be recomputed if the delta's parent isn't a parent of the
2707 2702 revision.
2708 2703
2709 2704 In addition to the delta policy, the ``forcedeltabothparents``
2710 2705 argument controls whether to force compute deltas against both parents
2711 2706 for merges. By default, the current default is used.
2712 2707
2713 2708 If not None, the `sidedatacompanion` is callable that accept two
2714 2709 arguments:
2715 2710
2716 2711 (srcrevlog, rev)
2717 2712
2718 2713 and return a quintet that control changes to sidedata content from the
2719 2714 old revision to the new clone result:
2720 2715
2721 2716 (dropall, filterout, update, new_flags, dropped_flags)
2722 2717
2723 2718 * if `dropall` is True, all sidedata should be dropped
2724 2719 * `filterout` is a set of sidedata keys that should be dropped
2725 2720 * `update` is a mapping of additionnal/new key -> value
2726 2721 * new_flags is a bitfields of new flags that the revision should get
2727 2722 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2728 2723 """
2729 2724 if deltareuse not in self.DELTAREUSEALL:
2730 2725 raise ValueError(
2731 2726 _(b'value for deltareuse invalid: %s') % deltareuse
2732 2727 )
2733 2728
2734 2729 if len(destrevlog):
2735 2730 raise ValueError(_(b'destination revlog is not empty'))
2736 2731
2737 2732 if getattr(self, 'filteredrevs', None):
2738 2733 raise ValueError(_(b'source revlog has filtered revisions'))
2739 2734 if getattr(destrevlog, 'filteredrevs', None):
2740 2735 raise ValueError(_(b'destination revlog has filtered revisions'))
2741 2736
2742 2737 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2743 2738 # if possible.
2744 2739 oldlazydelta = destrevlog._lazydelta
2745 2740 oldlazydeltabase = destrevlog._lazydeltabase
2746 2741 oldamd = destrevlog._deltabothparents
2747 2742
2748 2743 try:
2749 2744 if deltareuse == self.DELTAREUSEALWAYS:
2750 2745 destrevlog._lazydeltabase = True
2751 2746 destrevlog._lazydelta = True
2752 2747 elif deltareuse == self.DELTAREUSESAMEREVS:
2753 2748 destrevlog._lazydeltabase = False
2754 2749 destrevlog._lazydelta = True
2755 2750 elif deltareuse == self.DELTAREUSENEVER:
2756 2751 destrevlog._lazydeltabase = False
2757 2752 destrevlog._lazydelta = False
2758 2753
2759 2754 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2760 2755
2761 2756 self._clone(
2762 2757 tr,
2763 2758 destrevlog,
2764 2759 addrevisioncb,
2765 2760 deltareuse,
2766 2761 forcedeltabothparents,
2767 2762 sidedatacompanion,
2768 2763 )
2769 2764
2770 2765 finally:
2771 2766 destrevlog._lazydelta = oldlazydelta
2772 2767 destrevlog._lazydeltabase = oldlazydeltabase
2773 2768 destrevlog._deltabothparents = oldamd
2774 2769
2775 2770 def _clone(
2776 2771 self,
2777 2772 tr,
2778 2773 destrevlog,
2779 2774 addrevisioncb,
2780 2775 deltareuse,
2781 2776 forcedeltabothparents,
2782 2777 sidedatacompanion,
2783 2778 ):
2784 2779 """perform the core duty of `revlog.clone` after parameter processing"""
2785 2780 deltacomputer = deltautil.deltacomputer(destrevlog)
2786 2781 index = self.index
2787 2782 for rev in self:
2788 2783 entry = index[rev]
2789 2784
2790 2785 # Some classes override linkrev to take filtered revs into
2791 2786 # account. Use raw entry from index.
2792 2787 flags = entry[0] & 0xFFFF
2793 2788 linkrev = entry[4]
2794 2789 p1 = index[entry[5]][7]
2795 2790 p2 = index[entry[6]][7]
2796 2791 node = entry[7]
2797 2792
2798 2793 sidedataactions = (False, [], {}, 0, 0)
2799 2794 if sidedatacompanion is not None:
2800 2795 sidedataactions = sidedatacompanion(self, rev)
2801 2796
2802 2797 # (Possibly) reuse the delta from the revlog if allowed and
2803 2798 # the revlog chunk is a delta.
2804 2799 cachedelta = None
2805 2800 rawtext = None
2806 2801 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2807 2802 dropall = sidedataactions[0]
2808 2803 filterout = sidedataactions[1]
2809 2804 update = sidedataactions[2]
2810 2805 new_flags = sidedataactions[3]
2811 2806 dropped_flags = sidedataactions[4]
2812 2807 text, sidedata = self._revisiondata(rev)
2813 2808 if dropall:
2814 2809 sidedata = {}
2815 2810 for key in filterout:
2816 2811 sidedata.pop(key, None)
2817 2812 sidedata.update(update)
2818 2813 if not sidedata:
2819 2814 sidedata = None
2820 2815
2821 2816 flags |= new_flags
2822 2817 flags &= ~dropped_flags
2823 2818
2824 2819 destrevlog.addrevision(
2825 2820 text,
2826 2821 tr,
2827 2822 linkrev,
2828 2823 p1,
2829 2824 p2,
2830 2825 cachedelta=cachedelta,
2831 2826 node=node,
2832 2827 flags=flags,
2833 2828 deltacomputer=deltacomputer,
2834 2829 sidedata=sidedata,
2835 2830 )
2836 2831 else:
2837 2832 if destrevlog._lazydelta:
2838 2833 dp = self.deltaparent(rev)
2839 2834 if dp != nullrev:
2840 2835 cachedelta = (dp, bytes(self._chunk(rev)))
2841 2836
2842 2837 if not cachedelta:
2843 2838 rawtext = self.rawdata(rev)
2844 2839
2845 2840 ifh = destrevlog.opener(
2846 2841 destrevlog.indexfile, b'a+', checkambig=False
2847 2842 )
2848 2843 dfh = None
2849 2844 if not destrevlog._inline:
2850 2845 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2851 2846 try:
2852 2847 destrevlog._addrevision(
2853 2848 node,
2854 2849 rawtext,
2855 2850 tr,
2856 2851 linkrev,
2857 2852 p1,
2858 2853 p2,
2859 2854 flags,
2860 2855 cachedelta,
2861 2856 ifh,
2862 2857 dfh,
2863 2858 deltacomputer=deltacomputer,
2864 2859 )
2865 2860 finally:
2866 2861 if dfh:
2867 2862 dfh.close()
2868 2863 ifh.close()
2869 2864
2870 2865 if addrevisioncb:
2871 2866 addrevisioncb(self, rev, node)
2872 2867
2873 2868 def censorrevision(self, tr, censornode, tombstone=b''):
2874 2869 if (self.version & 0xFFFF) == REVLOGV0:
2875 2870 raise error.RevlogError(
2876 2871 _(b'cannot censor with version %d revlogs') % self.version
2877 2872 )
2878 2873
2879 2874 censorrev = self.rev(censornode)
2880 2875 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2881 2876
2882 2877 if len(tombstone) > self.rawsize(censorrev):
2883 2878 raise error.Abort(
2884 2879 _(b'censor tombstone must be no longer than censored data')
2885 2880 )
2886 2881
2887 2882 # Rewriting the revlog in place is hard. Our strategy for censoring is
2888 2883 # to create a new revlog, copy all revisions to it, then replace the
2889 2884 # revlogs on transaction close.
2890 2885
2891 2886 newindexfile = self.indexfile + b'.tmpcensored'
2892 2887 newdatafile = self.datafile + b'.tmpcensored'
2893 2888
2894 2889 # This is a bit dangerous. We could easily have a mismatch of state.
2895 2890 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2896 2891 newrl.version = self.version
2897 2892 newrl._generaldelta = self._generaldelta
2898 2893 newrl._io = self._io
2899 2894
2900 2895 for rev in self.revs():
2901 2896 node = self.node(rev)
2902 2897 p1, p2 = self.parents(node)
2903 2898
2904 2899 if rev == censorrev:
2905 2900 newrl.addrawrevision(
2906 2901 tombstone,
2907 2902 tr,
2908 2903 self.linkrev(censorrev),
2909 2904 p1,
2910 2905 p2,
2911 2906 censornode,
2912 2907 REVIDX_ISCENSORED,
2913 2908 )
2914 2909
2915 2910 if newrl.deltaparent(rev) != nullrev:
2916 2911 raise error.Abort(
2917 2912 _(
2918 2913 b'censored revision stored as delta; '
2919 2914 b'cannot censor'
2920 2915 ),
2921 2916 hint=_(
2922 2917 b'censoring of revlogs is not '
2923 2918 b'fully implemented; please report '
2924 2919 b'this bug'
2925 2920 ),
2926 2921 )
2927 2922 continue
2928 2923
2929 2924 if self.iscensored(rev):
2930 2925 if self.deltaparent(rev) != nullrev:
2931 2926 raise error.Abort(
2932 2927 _(
2933 2928 b'cannot censor due to censored '
2934 2929 b'revision having delta stored'
2935 2930 )
2936 2931 )
2937 2932 rawtext = self._chunk(rev)
2938 2933 else:
2939 2934 rawtext = self.rawdata(rev)
2940 2935
2941 2936 newrl.addrawrevision(
2942 2937 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2943 2938 )
2944 2939
2945 2940 tr.addbackup(self.indexfile, location=b'store')
2946 2941 if not self._inline:
2947 2942 tr.addbackup(self.datafile, location=b'store')
2948 2943
2949 2944 self.opener.rename(newrl.indexfile, self.indexfile)
2950 2945 if not self._inline:
2951 2946 self.opener.rename(newrl.datafile, self.datafile)
2952 2947
2953 2948 self.clearcaches()
2954 2949 self._loadindex()
2955 2950
2956 2951 def verifyintegrity(self, state):
2957 2952 """Verifies the integrity of the revlog.
2958 2953
2959 2954 Yields ``revlogproblem`` instances describing problems that are
2960 2955 found.
2961 2956 """
2962 2957 dd, di = self.checksize()
2963 2958 if dd:
2964 2959 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2965 2960 if di:
2966 2961 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2967 2962
2968 2963 version = self.version & 0xFFFF
2969 2964
2970 2965 # The verifier tells us what version revlog we should be.
2971 2966 if version != state[b'expectedversion']:
2972 2967 yield revlogproblem(
2973 2968 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2974 2969 % (self.indexfile, version, state[b'expectedversion'])
2975 2970 )
2976 2971
2977 2972 state[b'skipread'] = set()
2978 2973 state[b'safe_renamed'] = set()
2979 2974
2980 2975 for rev in self:
2981 2976 node = self.node(rev)
2982 2977
2983 2978 # Verify contents. 4 cases to care about:
2984 2979 #
2985 2980 # common: the most common case
2986 2981 # rename: with a rename
2987 2982 # meta: file content starts with b'\1\n', the metadata
2988 2983 # header defined in filelog.py, but without a rename
2989 2984 # ext: content stored externally
2990 2985 #
2991 2986 # More formally, their differences are shown below:
2992 2987 #
2993 2988 # | common | rename | meta | ext
2994 2989 # -------------------------------------------------------
2995 2990 # flags() | 0 | 0 | 0 | not 0
2996 2991 # renamed() | False | True | False | ?
2997 2992 # rawtext[0:2]=='\1\n'| False | True | True | ?
2998 2993 #
2999 2994 # "rawtext" means the raw text stored in revlog data, which
3000 2995 # could be retrieved by "rawdata(rev)". "text"
3001 2996 # mentioned below is "revision(rev)".
3002 2997 #
3003 2998 # There are 3 different lengths stored physically:
3004 2999 # 1. L1: rawsize, stored in revlog index
3005 3000 # 2. L2: len(rawtext), stored in revlog data
3006 3001 # 3. L3: len(text), stored in revlog data if flags==0, or
3007 3002 # possibly somewhere else if flags!=0
3008 3003 #
3009 3004 # L1 should be equal to L2. L3 could be different from them.
3010 3005 # "text" may or may not affect commit hash depending on flag
3011 3006 # processors (see flagutil.addflagprocessor).
3012 3007 #
3013 3008 # | common | rename | meta | ext
3014 3009 # -------------------------------------------------
3015 3010 # rawsize() | L1 | L1 | L1 | L1
3016 3011 # size() | L1 | L2-LM | L1(*) | L1 (?)
3017 3012 # len(rawtext) | L2 | L2 | L2 | L2
3018 3013 # len(text) | L2 | L2 | L2 | L3
3019 3014 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3020 3015 #
3021 3016 # LM: length of metadata, depending on rawtext
3022 3017 # (*): not ideal, see comment in filelog.size
3023 3018 # (?): could be "- len(meta)" if the resolved content has
3024 3019 # rename metadata
3025 3020 #
3026 3021 # Checks needed to be done:
3027 3022 # 1. length check: L1 == L2, in all cases.
3028 3023 # 2. hash check: depending on flag processor, we may need to
3029 3024 # use either "text" (external), or "rawtext" (in revlog).
3030 3025
3031 3026 try:
3032 3027 skipflags = state.get(b'skipflags', 0)
3033 3028 if skipflags:
3034 3029 skipflags &= self.flags(rev)
3035 3030
3036 3031 _verify_revision(self, skipflags, state, node)
3037 3032
3038 3033 l1 = self.rawsize(rev)
3039 3034 l2 = len(self.rawdata(node))
3040 3035
3041 3036 if l1 != l2:
3042 3037 yield revlogproblem(
3043 3038 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3044 3039 node=node,
3045 3040 )
3046 3041
3047 3042 except error.CensoredNodeError:
3048 3043 if state[b'erroroncensored']:
3049 3044 yield revlogproblem(
3050 3045 error=_(b'censored file data'), node=node
3051 3046 )
3052 3047 state[b'skipread'].add(node)
3053 3048 except Exception as e:
3054 3049 yield revlogproblem(
3055 3050 error=_(b'unpacking %s: %s')
3056 3051 % (short(node), stringutil.forcebytestr(e)),
3057 3052 node=node,
3058 3053 )
3059 3054 state[b'skipread'].add(node)
3060 3055
3061 3056 def storageinfo(
3062 3057 self,
3063 3058 exclusivefiles=False,
3064 3059 sharedfiles=False,
3065 3060 revisionscount=False,
3066 3061 trackedsize=False,
3067 3062 storedsize=False,
3068 3063 ):
3069 3064 d = {}
3070 3065
3071 3066 if exclusivefiles:
3072 3067 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3073 3068 if not self._inline:
3074 3069 d[b'exclusivefiles'].append((self.opener, self.datafile))
3075 3070
3076 3071 if sharedfiles:
3077 3072 d[b'sharedfiles'] = []
3078 3073
3079 3074 if revisionscount:
3080 3075 d[b'revisionscount'] = len(self)
3081 3076
3082 3077 if trackedsize:
3083 3078 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3084 3079
3085 3080 if storedsize:
3086 3081 d[b'storedsize'] = sum(
3087 3082 self.opener.stat(path).st_size for path in self.files()
3088 3083 )
3089 3084
3090 3085 return d
@@ -1,636 +1,647
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import os
13 13 import re
14 14 import struct
15 15
16 16 from ..node import hex
17 17
18 18 from .. import (
19 19 error,
20 20 util,
21 21 )
22 22
23 23
24 24 class NodeMap(dict):
25 25 def __missing__(self, x):
26 26 raise error.RevlogError(b'unknown node: %s' % x)
27 27
28 28
29 29 def persisted_data(revlog):
30 30 """read the nodemap for a revlog from disk"""
31 31 if revlog.nodemap_file is None:
32 32 return None
33 33 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 34 if not pdata:
35 35 return None
36 36 offset = 0
37 37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 38 if version != ONDISK_VERSION:
39 39 return None
40 40 offset += S_VERSION.size
41 41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 43 offset += S_HEADER.size
44 44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 45 offset += uid_size
46 46 docket.tip_rev = tip_rev
47 47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 48 docket.data_length = data_length
49 49 docket.data_unused = data_unused
50 50
51 51 filename = _rawdata_filepath(revlog, docket)
52 52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 53 try:
54 54 with revlog.opener(filename) as fd:
55 55 if use_mmap:
56 56 data = util.buffer(util.mmapread(fd, data_length))
57 57 else:
58 58 data = fd.read(data_length)
59 59 except (IOError, OSError) as e:
60 60 if e.errno == errno.ENOENT:
61 61 return None
62 62 else:
63 63 raise
64 64 if len(data) < data_length:
65 65 return None
66 66 return docket, data
67 67
68 68
69 69 def setup_persistent_nodemap(tr, revlog):
70 70 """Install whatever is needed transaction side to persist a nodemap on disk
71 71
72 72 (only actually persist the nodemap if this is relevant for this revlog)
73 73 """
74 74 if revlog._inline:
75 75 return # inlined revlog are too small for this to be relevant
76 76 if revlog.nodemap_file is None:
77 77 return # we do not use persistent_nodemap on this revlog
78 78
79 79 # we need to happen after the changelog finalization, in that use "cl-"
80 80 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
81 81 if tr.hasfinalize(callback_id):
82 82 return # no need to register again
83 83 tr.addpending(
84 84 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
85 85 )
86 86 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
87 87
88 88
89 89 class _NoTransaction(object):
90 90 """transaction like object to update the nodemap outside a transaction"""
91 91
92 92 def __init__(self):
93 93 self._postclose = {}
94 94
95 95 def addpostclose(self, callback_id, callback_func):
96 96 self._postclose[callback_id] = callback_func
97 97
98 98 def registertmp(self, *args, **kwargs):
99 99 pass
100 100
101 101 def addbackup(self, *args, **kwargs):
102 102 pass
103 103
104 104 def add(self, *args, **kwargs):
105 105 pass
106 106
107 107 def addabort(self, *args, **kwargs):
108 108 pass
109 109
110 110 def _report(self, *args):
111 111 pass
112 112
113 113
114 114 def update_persistent_nodemap(revlog):
115 115 """update the persistent nodemap right now
116 116
117 117 To be used for updating the nodemap on disk outside of a normal transaction
118 118 setup (eg, `debugupdatecache`).
119 119 """
120 120 if revlog._inline:
121 121 return # inlined revlog are too small for this to be relevant
122 122 if revlog.nodemap_file is None:
123 123 return # we do not use persistent_nodemap on this revlog
124 124
125 125 notr = _NoTransaction()
126 126 persist_nodemap(notr, revlog)
127 127 for k in sorted(notr._postclose):
128 128 notr._postclose[k](None)
129 129
130 130
131 131 def persist_nodemap(tr, revlog, pending=False):
132 132 """Write nodemap data on disk for a given revlog"""
133 133 if getattr(revlog, 'filteredrevs', ()):
134 134 raise error.ProgrammingError(
135 135 "cannot persist nodemap of a filtered changelog"
136 136 )
137 137 if revlog.nodemap_file is None:
138 138 msg = "calling persist nodemap on a revlog without the feature enableb"
139 139 raise error.ProgrammingError(msg)
140 140
141 141 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
142 142 ondisk_docket = revlog._nodemap_docket
143 143 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
144 144 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
145 145
146 146 data = None
147 147 # first attemp an incremental update of the data
148 148 if can_incremental and ondisk_docket is not None:
149 149 target_docket = revlog._nodemap_docket.copy()
150 150 (
151 151 src_docket,
152 152 data_changed_count,
153 153 data,
154 154 ) = revlog.index.nodemap_data_incremental()
155 155 new_length = target_docket.data_length + len(data)
156 156 new_unused = target_docket.data_unused + data_changed_count
157 157 if src_docket != target_docket:
158 158 data = None
159 159 elif new_length <= (new_unused * 10): # under 10% of unused data
160 160 data = None
161 161 else:
162 162 datafile = _rawdata_filepath(revlog, target_docket)
163 163 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
164 164 # store vfs
165 165 tr.add(datafile, target_docket.data_length)
166 166 with revlog.opener(datafile, b'r+') as fd:
167 167 fd.seek(target_docket.data_length)
168 168 fd.write(data)
169 169 if feed_data:
170 170 if use_mmap:
171 171 fd.seek(0)
172 172 new_data = fd.read(new_length)
173 173 else:
174 174 fd.flush()
175 175 new_data = util.buffer(util.mmapread(fd, new_length))
176 176 target_docket.data_length = new_length
177 177 target_docket.data_unused = new_unused
178 178
179 179 if data is None:
180 180 # otherwise fallback to a full new export
181 181 target_docket = NodeMapDocket()
182 182 datafile = _rawdata_filepath(revlog, target_docket)
183 183 if util.safehasattr(revlog.index, "nodemap_data_all"):
184 184 data = revlog.index.nodemap_data_all()
185 185 else:
186 186 data = persistent_data(revlog.index)
187 187 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
188 188 # store vfs
189 189
190 190 tryunlink = revlog.opener.tryunlink
191 191
192 192 def abortck(tr):
193 193 tryunlink(datafile)
194 194
195 195 callback_id = b"delete-%s" % datafile
196 196
197 197 # some flavor of the transaction abort does not cleanup new file, it
198 198 # simply empty them.
199 199 tr.addabort(callback_id, abortck)
200 200 with revlog.opener(datafile, b'w+') as fd:
201 201 fd.write(data)
202 202 if feed_data:
203 203 if use_mmap:
204 204 new_data = data
205 205 else:
206 206 fd.flush()
207 207 new_data = util.buffer(util.mmapread(fd, len(data)))
208 208 target_docket.data_length = len(data)
209 209 target_docket.tip_rev = revlog.tiprev()
210 210 target_docket.tip_node = revlog.node(target_docket.tip_rev)
211 211 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
212 212 # store vfs
213 213 file_path = revlog.nodemap_file
214 214 if pending:
215 215 file_path += b'.a'
216 216 tr.registertmp(file_path)
217 217 else:
218 218 tr.addbackup(file_path)
219 219
220 220 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
221 221 fp.write(target_docket.serialize())
222 222 revlog._nodemap_docket = target_docket
223 223 if feed_data:
224 224 revlog.index.update_nodemap_data(target_docket, new_data)
225 225
226 226 # search for old index file in all cases, some older process might have
227 227 # left one behind.
228 228 olds = _other_rawdata_filepath(revlog, target_docket)
229 229 if olds:
230 230 realvfs = getattr(revlog, '_realopener', revlog.opener)
231 231
232 232 def cleanup(tr):
233 233 for oldfile in olds:
234 234 realvfs.tryunlink(oldfile)
235 235
236 236 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
237 237 tr.addpostclose(callback_id, cleanup)
238 238
239 239
240 240 ### Nodemap docket file
241 241 #
242 242 # The nodemap data are stored on disk using 2 files:
243 243 #
244 244 # * a raw data files containing a persistent nodemap
245 245 # (see `Nodemap Trie` section)
246 246 #
247 247 # * a small "docket" file containing medatadata
248 248 #
249 249 # While the nodemap data can be multiple tens of megabytes, the "docket" is
250 250 # small, it is easy to update it automatically or to duplicated its content
251 251 # during a transaction.
252 252 #
253 253 # Multiple raw data can exist at the same time (The currently valid one and a
254 254 # new one beind used by an in progress transaction). To accomodate this, the
255 255 # filename hosting the raw data has a variable parts. The exact filename is
256 256 # specified inside the "docket" file.
257 257 #
258 258 # The docket file contains information to find, qualify and validate the raw
259 259 # data. Its content is currently very light, but it will expand as the on disk
260 260 # nodemap gains the necessary features to be used in production.
261 261
262 262 ONDISK_VERSION = 1
263 263 S_VERSION = struct.Struct(">B")
264 264 S_HEADER = struct.Struct(">BQQQQ")
265 265
266 266 ID_SIZE = 8
267 267
268 268
269 269 def _make_uid():
270 270 """return a new unique identifier.
271 271
272 272 The identifier is random and composed of ascii characters."""
273 273 return hex(os.urandom(ID_SIZE))
274 274
275 275
276 276 class NodeMapDocket(object):
277 277 """metadata associated with persistent nodemap data
278 278
279 279 The persistent data may come from disk or be on their way to disk.
280 280 """
281 281
282 282 def __init__(self, uid=None):
283 283 if uid is None:
284 284 uid = _make_uid()
285 285 # a unique identifier for the data file:
286 286 # - When new data are appended, it is preserved.
287 287 # - When a new data file is created, a new identifier is generated.
288 288 self.uid = uid
289 289 # the tipmost revision stored in the data file. This revision and all
290 290 # revision before it are expected to be encoded in the data file.
291 291 self.tip_rev = None
292 292 # the node of that tipmost revision, if it mismatch the current index
293 293 # data the docket is not valid for the current index and should be
294 294 # discarded.
295 295 #
296 296 # note: this method is not perfect as some destructive operation could
297 297 # preserve the same tip_rev + tip_node while altering lower revision.
298 298 # However this multiple other caches have the same vulnerability (eg:
299 299 # brancmap cache).
300 300 self.tip_node = None
301 301 # the size (in bytes) of the persisted data to encode the nodemap valid
302 302 # for `tip_rev`.
303 303 # - data file shorter than this are corrupted,
304 304 # - any extra data should be ignored.
305 305 self.data_length = None
306 306 # the amount (in bytes) of "dead" data, still in the data file but no
307 307 # longer used for the nodemap.
308 308 self.data_unused = 0
309 309
310 310 def copy(self):
311 311 new = NodeMapDocket(uid=self.uid)
312 312 new.tip_rev = self.tip_rev
313 313 new.tip_node = self.tip_node
314 314 new.data_length = self.data_length
315 315 new.data_unused = self.data_unused
316 316 return new
317 317
318 318 def __cmp__(self, other):
319 319 if self.uid < other.uid:
320 320 return -1
321 321 if self.uid > other.uid:
322 322 return 1
323 323 elif self.data_length < other.data_length:
324 324 return -1
325 325 elif self.data_length > other.data_length:
326 326 return 1
327 327 return 0
328 328
329 329 def __eq__(self, other):
330 330 return self.uid == other.uid and self.data_length == other.data_length
331 331
332 332 def serialize(self):
333 333 """return serialized bytes for a docket using the passed uid"""
334 334 data = []
335 335 data.append(S_VERSION.pack(ONDISK_VERSION))
336 336 headers = (
337 337 len(self.uid),
338 338 self.tip_rev,
339 339 self.data_length,
340 340 self.data_unused,
341 341 len(self.tip_node),
342 342 )
343 343 data.append(S_HEADER.pack(*headers))
344 344 data.append(self.uid)
345 345 data.append(self.tip_node)
346 346 return b''.join(data)
347 347
348 348
349 349 def _rawdata_filepath(revlog, docket):
350 350 """The (vfs relative) nodemap's rawdata file for a given uid"""
351 351 if revlog.nodemap_file.endswith(b'.n.a'):
352 352 prefix = revlog.nodemap_file[:-4]
353 353 else:
354 354 prefix = revlog.nodemap_file[:-2]
355 355 return b"%s-%s.nd" % (prefix, docket.uid)
356 356
357 357
358 358 def _other_rawdata_filepath(revlog, docket):
359 359 prefix = revlog.nodemap_file[:-2]
360 360 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
361 361 new_file_path = _rawdata_filepath(revlog, docket)
362 362 new_file_name = revlog.opener.basename(new_file_path)
363 363 dirpath = revlog.opener.dirname(new_file_path)
364 364 others = []
365 365 for f in revlog.opener.listdir(dirpath):
366 366 if pattern.match(f) and f != new_file_name:
367 367 others.append(f)
368 368 return others
369 369
370 370
371 371 ### Nodemap Trie
372 372 #
373 373 # This is a simple reference implementation to compute and persist a nodemap
374 374 # trie. This reference implementation is write only. The python version of this
375 375 # is not expected to be actually used, since it wont provide performance
376 376 # improvement over existing non-persistent C implementation.
377 377 #
378 378 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
379 379 # revision can be adressed using its node shortest prefix.
380 380 #
381 381 # The trie is stored as a sequence of block. Each block contains 16 entries
382 382 # (signed 64bit integer, big endian). Each entry can be one of the following:
383 383 #
384 384 # * value >= 0 -> index of sub-block
385 385 # * value == -1 -> no value
386 386 # * value < -1 -> encoded revision: rev = -(value+2)
387 387 #
388 388 # See REV_OFFSET and _transform_rev below.
389 389 #
390 390 # The implementation focus on simplicity, not on performance. A Rust
391 391 # implementation should provide a efficient version of the same binary
392 392 # persistence. This reference python implementation is never meant to be
393 393 # extensively use in production.
394 394
395 395
396 396 def persistent_data(index):
397 397 """return the persistent binary form for a nodemap for a given index"""
398 398 trie = _build_trie(index)
399 399 return _persist_trie(trie)
400 400
401 401
402 402 def update_persistent_data(index, root, max_idx, last_rev):
403 403 """return the incremental update for persistent nodemap from a given index"""
404 404 changed_block, trie = _update_trie(index, root, last_rev)
405 405 return (
406 406 changed_block * S_BLOCK.size,
407 407 _persist_trie(trie, existing_idx=max_idx),
408 408 )
409 409
410 410
411 411 S_BLOCK = struct.Struct(">" + ("l" * 16))
412 412
413 413 NO_ENTRY = -1
414 414 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
415 415 REV_OFFSET = 2
416 416
417 417
418 418 def _transform_rev(rev):
419 419 """Return the number used to represent the rev in the tree.
420 420
421 421 (or retrieve a rev number from such representation)
422 422
423 423 Note that this is an involution, a function equal to its inverse (i.e.
424 424 which gives the identity when applied to itself).
425 425 """
426 426 return -(rev + REV_OFFSET)
427 427
428 428
429 429 def _to_int(hex_digit):
430 430 """turn an hexadecimal digit into a proper integer"""
431 431 return int(hex_digit, 16)
432 432
433 433
434 434 class Block(dict):
435 435 """represent a block of the Trie
436 436
437 437 contains up to 16 entry indexed from 0 to 15"""
438 438
439 439 def __init__(self):
440 440 super(Block, self).__init__()
441 441 # If this block exist on disk, here is its ID
442 442 self.ondisk_id = None
443 443
444 444 def __iter__(self):
445 445 return iter(self.get(i) for i in range(16))
446 446
447 447
448 448 def _build_trie(index):
449 449 """build a nodemap trie
450 450
451 451 The nodemap stores revision number for each unique prefix.
452 452
453 453 Each block is a dictionary with keys in `[0, 15]`. Values are either
454 454 another block or a revision number.
455 455 """
456 456 root = Block()
457 457 for rev in range(len(index)):
458 458 current_hex = hex(index[rev][7])
459 459 _insert_into_block(index, 0, root, rev, current_hex)
460 460 return root
461 461
462 462
463 463 def _update_trie(index, root, last_rev):
464 464 """consume"""
465 465 changed = 0
466 466 for rev in range(last_rev + 1, len(index)):
467 467 current_hex = hex(index[rev][7])
468 468 changed += _insert_into_block(index, 0, root, rev, current_hex)
469 469 return changed, root
470 470
471 471
472 472 def _insert_into_block(index, level, block, current_rev, current_hex):
473 473 """insert a new revision in a block
474 474
475 475 index: the index we are adding revision for
476 476 level: the depth of the current block in the trie
477 477 block: the block currently being considered
478 478 current_rev: the revision number we are adding
479 479 current_hex: the hexadecimal representation of the of that revision
480 480 """
481 481 changed = 1
482 482 if block.ondisk_id is not None:
483 483 block.ondisk_id = None
484 484 hex_digit = _to_int(current_hex[level : level + 1])
485 485 entry = block.get(hex_digit)
486 486 if entry is None:
487 487 # no entry, simply store the revision number
488 488 block[hex_digit] = current_rev
489 489 elif isinstance(entry, dict):
490 490 # need to recurse to an underlying block
491 491 changed += _insert_into_block(
492 492 index, level + 1, entry, current_rev, current_hex
493 493 )
494 494 else:
495 495 # collision with a previously unique prefix, inserting new
496 496 # vertices to fit both entry.
497 497 other_hex = hex(index[entry][7])
498 498 other_rev = entry
499 499 new = Block()
500 500 block[hex_digit] = new
501 501 _insert_into_block(index, level + 1, new, other_rev, other_hex)
502 502 _insert_into_block(index, level + 1, new, current_rev, current_hex)
503 503 return changed
504 504
505 505
506 506 def _persist_trie(root, existing_idx=None):
507 507 """turn a nodemap trie into persistent binary data
508 508
509 509 See `_build_trie` for nodemap trie structure"""
510 510 block_map = {}
511 511 if existing_idx is not None:
512 512 base_idx = existing_idx + 1
513 513 else:
514 514 base_idx = 0
515 515 chunks = []
516 516 for tn in _walk_trie(root):
517 517 if tn.ondisk_id is not None:
518 518 block_map[id(tn)] = tn.ondisk_id
519 519 else:
520 520 block_map[id(tn)] = len(chunks) + base_idx
521 521 chunks.append(_persist_block(tn, block_map))
522 522 return b''.join(chunks)
523 523
524 524
525 525 def _walk_trie(block):
526 526 """yield all the block in a trie
527 527
528 528 Children blocks are always yield before their parent block.
529 529 """
530 530 for (__, item) in sorted(block.items()):
531 531 if isinstance(item, dict):
532 532 for sub_block in _walk_trie(item):
533 533 yield sub_block
534 534 yield block
535 535
536 536
537 537 def _persist_block(block_node, block_map):
538 538 """produce persistent binary data for a single block
539 539
540 540 Children block are assumed to be already persisted and present in
541 541 block_map.
542 542 """
543 543 data = tuple(_to_value(v, block_map) for v in block_node)
544 544 return S_BLOCK.pack(*data)
545 545
546 546
547 547 def _to_value(item, block_map):
548 548 """persist any value as an integer"""
549 549 if item is None:
550 550 return NO_ENTRY
551 551 elif isinstance(item, dict):
552 552 return block_map[id(item)]
553 553 else:
554 554 return _transform_rev(item)
555 555
556 556
557 557 def parse_data(data):
558 558 """parse parse nodemap data into a nodemap Trie"""
559 559 if (len(data) % S_BLOCK.size) != 0:
560 560 msg = "nodemap data size is not a multiple of block size (%d): %d"
561 561 raise error.Abort(msg % (S_BLOCK.size, len(data)))
562 562 if not data:
563 563 return Block(), None
564 564 block_map = {}
565 565 new_blocks = []
566 566 for i in range(0, len(data), S_BLOCK.size):
567 567 block = Block()
568 568 block.ondisk_id = len(block_map)
569 569 block_map[block.ondisk_id] = block
570 570 block_data = data[i : i + S_BLOCK.size]
571 571 values = S_BLOCK.unpack(block_data)
572 572 new_blocks.append((block, values))
573 573 for b, values in new_blocks:
574 574 for idx, v in enumerate(values):
575 575 if v == NO_ENTRY:
576 576 continue
577 577 elif v >= 0:
578 578 b[idx] = block_map[v]
579 579 else:
580 580 b[idx] = _transform_rev(v)
581 581 return block, i // S_BLOCK.size
582 582
583 583
584 584 # debug utility
585 585
586 586
587 587 def check_data(ui, index, data):
588 588 """verify that the provided nodemap data are valid for the given idex"""
589 589 ret = 0
590 590 ui.status((b"revision in index: %d\n") % len(index))
591 591 root, __ = parse_data(data)
592 592 all_revs = set(_all_revisions(root))
593 593 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
594 594 for r in range(len(index)):
595 595 if r not in all_revs:
596 596 msg = b" revision missing from nodemap: %d\n" % r
597 597 ui.write_err(msg)
598 598 ret = 1
599 599 else:
600 600 all_revs.remove(r)
601 601 nm_rev = _find_node(root, hex(index[r][7]))
602 602 if nm_rev is None:
603 603 msg = b" revision node does not match any entries: %d\n" % r
604 604 ui.write_err(msg)
605 605 ret = 1
606 606 elif nm_rev != r:
607 607 msg = (
608 608 b" revision node does not match the expected revision: "
609 609 b"%d != %d\n" % (r, nm_rev)
610 610 )
611 611 ui.write_err(msg)
612 612 ret = 1
613 613
614 614 if all_revs:
615 615 for r in sorted(all_revs):
616 616 msg = b" extra revision in nodemap: %d\n" % r
617 617 ui.write_err(msg)
618 618 ret = 1
619 619 return ret
620 620
621 621
622 622 def _all_revisions(root):
623 623 """return all revisions stored in a Trie"""
624 624 for block in _walk_trie(root):
625 625 for v in block:
626 626 if v is None or isinstance(v, Block):
627 627 continue
628 628 yield v
629 629
630 630
631 631 def _find_node(block, node):
632 632 """find the revision associated with a given node"""
633 633 entry = block.get(_to_int(node[0:1]))
634 634 if isinstance(entry, dict):
635 635 return _find_node(entry, node[1:])
636 636 return entry
637
638
639 def get_nodemap_file(opener, indexfile):
640 if indexfile.endswith(b'.a'):
641 pending_path = indexfile[:-4] + b".n.a"
642 if opener.exists(pending_path):
643 return pending_path
644 else:
645 return indexfile[:-4] + b".n"
646 else:
647 return indexfile[:-2] + b".n"
General Comments 0
You need to be logged in to leave comments. Login now