##// END OF EJS Templates
revlog: move the_revisioncache on the inner object...
marmoute -
r51989:8ec2de9c default
parent child Browse files
Show More
@@ -1,754 +1,757 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14
15 15 import contextlib
16 16 import os
17 17 import shutil
18 18
19 19 from .i18n import _
20 20 from .node import (
21 21 hex,
22 22 nullrev,
23 23 )
24 24
25 25 from . import (
26 26 bundle2,
27 27 changegroup,
28 28 changelog,
29 29 cmdutil,
30 30 discovery,
31 31 encoding,
32 32 error,
33 33 exchange,
34 34 filelog,
35 35 localrepo,
36 36 manifest,
37 37 mdiff,
38 38 pathutil,
39 39 phases,
40 40 pycompat,
41 41 revlog,
42 42 revlogutils,
43 43 util,
44 44 vfs as vfsmod,
45 45 )
46 46 from .utils import (
47 47 urlutil,
48 48 )
49 49
50 50 from .revlogutils import (
51 51 constants as revlog_constants,
52 52 )
53 53
54 54
55 55 class bundlerevlog(revlog.revlog):
56 56 def __init__(self, opener, target, radix, cgunpacker, linkmapper):
57 57 # How it works:
58 58 # To retrieve a revision, we need to know the offset of the revision in
59 59 # the bundle (an unbundle object). We store this offset in the index
60 60 # (start). The base of the delta is stored in the base field.
61 61 #
62 62 # To differentiate a rev in the bundle from a rev in the revlog, we
63 63 # check revision against repotiprev.
64 64 opener = vfsmod.readonlyvfs(opener)
65 65 revlog.revlog.__init__(self, opener, target=target, radix=radix)
66 66 self.bundle = cgunpacker
67 67 n = len(self)
68 68 self.repotiprev = n - 1
69 69 self.bundlerevs = set() # used by 'bundle()' revset expression
70 70 for deltadata in cgunpacker.deltaiter():
71 71 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
72 72
73 73 size = len(delta)
74 74 start = cgunpacker.tell() - size
75 75
76 76 if self.index.has_node(node):
77 77 # this can happen if two branches make the same change
78 78 self.bundlerevs.add(self.index.rev(node))
79 79 continue
80 80 if cs == node:
81 81 linkrev = nullrev
82 82 else:
83 83 linkrev = linkmapper(cs)
84 84
85 85 for p in (p1, p2):
86 86 if not self.index.has_node(p):
87 87 raise error.LookupError(
88 88 p, self.display_id, _(b"unknown parent")
89 89 )
90 90
91 91 if not self.index.has_node(deltabase):
92 92 raise error.LookupError(
93 93 deltabase, self.display_id, _(b'unknown delta base')
94 94 )
95 95
96 96 baserev = self.rev(deltabase)
97 97 # start, size, full unc. size, base (unused), link, p1, p2, node, sidedata_offset (unused), sidedata_size (unused)
98 98 e = revlogutils.entry(
99 99 flags=flags,
100 100 data_offset=start,
101 101 data_compressed_length=size,
102 102 data_delta_base=baserev,
103 103 link_rev=linkrev,
104 104 parent_rev_1=self.rev(p1),
105 105 parent_rev_2=self.rev(p2),
106 106 node_id=node,
107 107 )
108 108 self.index.append(e)
109 109 self.bundlerevs.add(n)
110 110 n += 1
111 111
112 112 @contextlib.contextmanager
113 113 def reading(self):
114 114 if self.repotiprev < 0:
115 115 yield
116 116 else:
117 117 with super().reading() as x:
118 118 yield x
119 119
120 120 def _chunk(self, rev):
121 121 # Warning: in case of bundle, the diff is against what we stored as
122 122 # delta base, not against rev - 1
123 123 # XXX: could use some caching
124 124 if rev <= self.repotiprev:
125 125 return revlog.revlog._chunk(self, rev)
126 126 self.bundle.seek(self.start(rev))
127 127 return self.bundle.read(self.length(rev))
128 128
129 129 def revdiff(self, rev1, rev2):
130 130 """return or calculate a delta between two revisions"""
131 131 if rev1 > self.repotiprev and rev2 > self.repotiprev:
132 132 # hot path for bundle
133 133 revb = self.index[rev2][3]
134 134 if revb == rev1:
135 135 return self._chunk(rev2)
136 136 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
137 137 return revlog.revlog.revdiff(self, rev1, rev2)
138 138
139 139 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
140 140
141 141 def _rawtext(self, node, rev):
142 142 if rev is None:
143 143 rev = self.rev(node)
144 144 validated = False
145 145 rawtext = None
146 146 chain = []
147 147 iterrev = rev
148 148 # reconstruct the revision if it is from a changegroup
149 149 while iterrev > self.repotiprev:
150 if self._revisioncache and self._revisioncache[1] == iterrev:
151 rawtext = self._revisioncache[2]
150 if (
151 self._inner._revisioncache
152 and self._inner._revisioncache[1] == iterrev
153 ):
154 rawtext = self._inner._revisioncache[2]
152 155 break
153 156 chain.append(iterrev)
154 157 iterrev = self.index[iterrev][3]
155 158 if iterrev == nullrev:
156 159 rawtext = b''
157 160 elif rawtext is None:
158 161 r = super(bundlerevlog, self)._rawtext(
159 162 self.node(iterrev),
160 163 iterrev,
161 164 )
162 165 __, rawtext, validated = r
163 166 if chain:
164 167 validated = False
165 168 while chain:
166 169 delta = self._chunk(chain.pop())
167 170 rawtext = mdiff.patches(rawtext, [delta])
168 171 return rev, rawtext, validated
169 172
170 173 def addrevision(self, *args, **kwargs):
171 174 raise NotImplementedError
172 175
173 176 def addgroup(self, *args, **kwargs):
174 177 raise NotImplementedError
175 178
176 179 def strip(self, *args, **kwargs):
177 180 raise NotImplementedError
178 181
179 182 def checksize(self):
180 183 raise NotImplementedError
181 184
182 185
183 186 class bundlechangelog(bundlerevlog, changelog.changelog):
184 187 def __init__(self, opener, cgunpacker):
185 188 changelog.changelog.__init__(self, opener)
186 189 linkmapper = lambda x: x
187 190 bundlerevlog.__init__(
188 191 self,
189 192 opener,
190 193 (revlog_constants.KIND_CHANGELOG, None),
191 194 self.radix,
192 195 cgunpacker,
193 196 linkmapper,
194 197 )
195 198
196 199
197 200 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
198 201 def __init__(
199 202 self,
200 203 nodeconstants,
201 204 opener,
202 205 cgunpacker,
203 206 linkmapper,
204 207 dirlogstarts=None,
205 208 dir=b'',
206 209 ):
207 210 # XXX manifestrevlog is not actually a revlog , so mixing it with
208 211 # bundlerevlog is not a good idea.
209 212 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
210 213 bundlerevlog.__init__(
211 214 self,
212 215 opener,
213 216 (revlog_constants.KIND_MANIFESTLOG, dir),
214 217 self._revlog.radix,
215 218 cgunpacker,
216 219 linkmapper,
217 220 )
218 221 if dirlogstarts is None:
219 222 dirlogstarts = {}
220 223 if self.bundle.version == b"03":
221 224 dirlogstarts = _getfilestarts(self.bundle)
222 225 self._dirlogstarts = dirlogstarts
223 226 self._linkmapper = linkmapper
224 227
225 228 def dirlog(self, d):
226 229 if d in self._dirlogstarts:
227 230 self.bundle.seek(self._dirlogstarts[d])
228 231 return bundlemanifest(
229 232 self.nodeconstants,
230 233 self.opener,
231 234 self.bundle,
232 235 self._linkmapper,
233 236 self._dirlogstarts,
234 237 dir=d,
235 238 )
236 239 return super(bundlemanifest, self).dirlog(d)
237 240
238 241
239 242 class bundlefilelog(filelog.filelog):
240 243 def __init__(self, opener, path, cgunpacker, linkmapper):
241 244 filelog.filelog.__init__(self, opener, path)
242 245 self._revlog = bundlerevlog(
243 246 opener,
244 247 # XXX should use the unencoded path
245 248 target=(revlog_constants.KIND_FILELOG, path),
246 249 radix=self._revlog.radix,
247 250 cgunpacker=cgunpacker,
248 251 linkmapper=linkmapper,
249 252 )
250 253
251 254
252 255 class bundlepeer(localrepo.localpeer):
253 256 def canpush(self):
254 257 return False
255 258
256 259
257 260 class bundlephasecache(phases.phasecache):
258 261 def __init__(self, *args, **kwargs):
259 262 super(bundlephasecache, self).__init__(*args, **kwargs)
260 263 if hasattr(self, 'opener'):
261 264 self.opener = vfsmod.readonlyvfs(self.opener)
262 265
263 266 def write(self):
264 267 raise NotImplementedError
265 268
266 269 def _write(self, fp):
267 270 raise NotImplementedError
268 271
269 272 def _updateroots(self, phase, newroots, tr):
270 273 self.phaseroots[phase] = newroots
271 274 self.invalidate()
272 275 self.dirty = True
273 276
274 277
275 278 def _getfilestarts(cgunpacker):
276 279 filespos = {}
277 280 for chunkdata in iter(cgunpacker.filelogheader, {}):
278 281 fname = chunkdata[b'filename']
279 282 filespos[fname] = cgunpacker.tell()
280 283 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
281 284 pass
282 285 return filespos
283 286
284 287
285 288 class bundlerepository:
286 289 """A repository instance that is a union of a local repo and a bundle.
287 290
288 291 Instances represent a read-only repository composed of a local repository
289 292 with the contents of a bundle file applied. The repository instance is
290 293 conceptually similar to the state of a repository after an
291 294 ``hg unbundle`` operation. However, the contents of the bundle are never
292 295 applied to the actual base repository.
293 296
294 297 Instances constructed directly are not usable as repository objects.
295 298 Use instance() or makebundlerepository() to create instances.
296 299 """
297 300
298 301 def __init__(self, bundlepath, url, tempparent):
299 302 self._tempparent = tempparent
300 303 self._url = url
301 304
302 305 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
303 306
304 307 # dict with the mapping 'filename' -> position in the changegroup.
305 308 self._cgfilespos = {}
306 309 self._bundlefile = None
307 310 self._cgunpacker = None
308 311 self.tempfile = None
309 312 f = util.posixfile(bundlepath, b"rb")
310 313 bundle = exchange.readbundle(self.ui, f, bundlepath)
311 314
312 315 if isinstance(bundle, bundle2.unbundle20):
313 316 self._bundlefile = bundle
314 317
315 318 cgpart = None
316 319 for part in bundle.iterparts(seekable=True):
317 320 if part.type == b'phase-heads':
318 321 self._handle_bundle2_phase_part(bundle, part)
319 322 elif part.type == b'changegroup':
320 323 if cgpart:
321 324 raise NotImplementedError(
322 325 b"can't process multiple changegroups"
323 326 )
324 327 cgpart = part
325 328 self._handle_bundle2_cg_part(bundle, part)
326 329
327 330 if not cgpart:
328 331 raise error.Abort(_(b"No changegroups found"))
329 332
330 333 # This is required to placate a later consumer, which expects
331 334 # the payload offset to be at the beginning of the changegroup.
332 335 # We need to do this after the iterparts() generator advances
333 336 # because iterparts() will seek to end of payload after the
334 337 # generator returns control to iterparts().
335 338 cgpart.seek(0, os.SEEK_SET)
336 339
337 340 elif isinstance(bundle, changegroup.cg1unpacker):
338 341 self._handle_bundle1(bundle, bundlepath)
339 342 else:
340 343 raise error.Abort(
341 344 _(b'bundle type %r cannot be read') % type(bundle)
342 345 )
343 346
344 347 def _handle_bundle1(self, bundle, bundlepath):
345 348 if bundle.compressed():
346 349 f = self._writetempbundle(bundle.read, b'.hg10un', header=b'HG10UN')
347 350 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
348 351
349 352 self._bundlefile = bundle
350 353 self._cgunpacker = bundle
351 354
352 355 self.firstnewrev = self.changelog.repotiprev + 1
353 356 phases.retractboundary(
354 357 self,
355 358 None,
356 359 phases.draft,
357 360 [ctx.node() for ctx in self[self.firstnewrev :]],
358 361 )
359 362
360 363 def _handle_bundle2_cg_part(self, bundle, part):
361 364 assert part.type == b'changegroup'
362 365 cgstream = part
363 366 targetphase = part.params.get(b'targetphase')
364 367 try:
365 368 targetphase = int(targetphase)
366 369 except TypeError:
367 370 pass
368 371 if targetphase is None:
369 372 targetphase = phases.draft
370 373 if targetphase not in phases.allphases:
371 374 m = _(b'unsupported targetphase: %d')
372 375 m %= targetphase
373 376 raise error.Abort(m)
374 377 version = part.params.get(b'version', b'01')
375 378 legalcgvers = changegroup.supportedincomingversions(self)
376 379 if version not in legalcgvers:
377 380 msg = _(b'Unsupported changegroup version: %s')
378 381 raise error.Abort(msg % version)
379 382 if bundle.compressed():
380 383 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
381 384
382 385 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
383 386
384 387 self.firstnewrev = self.changelog.repotiprev + 1
385 388 phases.retractboundary(
386 389 self,
387 390 None,
388 391 targetphase,
389 392 [ctx.node() for ctx in self[self.firstnewrev :]],
390 393 )
391 394
392 395 def _handle_bundle2_phase_part(self, bundle, part):
393 396 assert part.type == b'phase-heads'
394 397
395 398 unfi = self.unfiltered()
396 399 headsbyphase = phases.binarydecode(part)
397 400 phases.updatephases(unfi, lambda: None, headsbyphase)
398 401
399 402 def _writetempbundle(self, readfn, suffix, header=b''):
400 403 """Write a temporary file to disk"""
401 404 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
402 405 self.tempfile = temp
403 406
404 407 with os.fdopen(fdtemp, 'wb') as fptemp:
405 408 fptemp.write(header)
406 409 while True:
407 410 chunk = readfn(2 ** 18)
408 411 if not chunk:
409 412 break
410 413 fptemp.write(chunk)
411 414
412 415 return self.vfs.open(self.tempfile, mode=b"rb")
413 416
414 417 @localrepo.unfilteredpropertycache
415 418 def _phasecache(self):
416 419 return bundlephasecache(self, self._phasedefaults)
417 420
418 421 @localrepo.unfilteredpropertycache
419 422 def changelog(self):
420 423 # consume the header if it exists
421 424 self._cgunpacker.changelogheader()
422 425 c = bundlechangelog(self.svfs, self._cgunpacker)
423 426 self.manstart = self._cgunpacker.tell()
424 427 return c
425 428
426 429 def _refreshchangelog(self):
427 430 # changelog for bundle repo are not filecache, this method is not
428 431 # applicable.
429 432 pass
430 433
431 434 @localrepo.unfilteredpropertycache
432 435 def manifestlog(self):
433 436 self._cgunpacker.seek(self.manstart)
434 437 # consume the header if it exists
435 438 self._cgunpacker.manifestheader()
436 439 linkmapper = self.unfiltered().changelog.rev
437 440 rootstore = bundlemanifest(
438 441 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
439 442 )
440 443 self.filestart = self._cgunpacker.tell()
441 444
442 445 return manifest.manifestlog(
443 446 self.svfs, self, rootstore, self.narrowmatch()
444 447 )
445 448
446 449 def _consumemanifest(self):
447 450 """Consumes the manifest portion of the bundle, setting filestart so the
448 451 file portion can be read."""
449 452 self._cgunpacker.seek(self.manstart)
450 453 self._cgunpacker.manifestheader()
451 454 for delta in self._cgunpacker.deltaiter():
452 455 pass
453 456 self.filestart = self._cgunpacker.tell()
454 457
455 458 @localrepo.unfilteredpropertycache
456 459 def manstart(self):
457 460 self.changelog
458 461 return self.manstart
459 462
460 463 @localrepo.unfilteredpropertycache
461 464 def filestart(self):
462 465 self.manifestlog
463 466
464 467 # If filestart was not set by self.manifestlog, that means the
465 468 # manifestlog implementation did not consume the manifests from the
466 469 # changegroup (ex: it might be consuming trees from a separate bundle2
467 470 # part instead). So we need to manually consume it.
468 471 if 'filestart' not in self.__dict__:
469 472 self._consumemanifest()
470 473
471 474 return self.filestart
472 475
473 476 def url(self):
474 477 return self._url
475 478
476 479 def file(self, f):
477 480 if not self._cgfilespos:
478 481 self._cgunpacker.seek(self.filestart)
479 482 self._cgfilespos = _getfilestarts(self._cgunpacker)
480 483
481 484 if f in self._cgfilespos:
482 485 self._cgunpacker.seek(self._cgfilespos[f])
483 486 linkmapper = self.unfiltered().changelog.rev
484 487 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
485 488 else:
486 489 return super(bundlerepository, self).file(f)
487 490
488 491 def close(self):
489 492 """Close assigned bundle file immediately."""
490 493 self._bundlefile.close()
491 494 if self.tempfile is not None:
492 495 self.vfs.unlink(self.tempfile)
493 496 if self._tempparent:
494 497 shutil.rmtree(self._tempparent, True)
495 498
496 499 def cancopy(self):
497 500 return False
498 501
499 502 def peer(self, path=None, remotehidden=False):
500 503 return bundlepeer(self, path=path, remotehidden=remotehidden)
501 504
502 505 def getcwd(self):
503 506 return encoding.getcwd() # always outside the repo
504 507
505 508 # Check if parents exist in localrepo before setting
506 509 def setparents(self, p1, p2=None):
507 510 if p2 is None:
508 511 p2 = self.nullid
509 512 p1rev = self.changelog.rev(p1)
510 513 p2rev = self.changelog.rev(p2)
511 514 msg = _(b"setting parent to node %s that only exists in the bundle\n")
512 515 if self.changelog.repotiprev < p1rev:
513 516 self.ui.warn(msg % hex(p1))
514 517 if self.changelog.repotiprev < p2rev:
515 518 self.ui.warn(msg % hex(p2))
516 519 return super(bundlerepository, self).setparents(p1, p2)
517 520
518 521
519 522 def instance(ui, path, create, intents=None, createopts=None):
520 523 if create:
521 524 raise error.Abort(_(b'cannot create new bundle repository'))
522 525 # internal config: bundle.mainreporoot
523 526 parentpath = ui.config(b"bundle", b"mainreporoot")
524 527 if not parentpath:
525 528 # try to find the correct path to the working directory repo
526 529 parentpath = cmdutil.findrepo(encoding.getcwd())
527 530 if parentpath is None:
528 531 parentpath = b''
529 532 if parentpath:
530 533 # Try to make the full path relative so we get a nice, short URL.
531 534 # In particular, we don't want temp dir names in test outputs.
532 535 cwd = encoding.getcwd()
533 536 if parentpath == cwd:
534 537 parentpath = b''
535 538 else:
536 539 cwd = pathutil.normasprefix(cwd)
537 540 if parentpath.startswith(cwd):
538 541 parentpath = parentpath[len(cwd) :]
539 542 u = urlutil.url(path)
540 543 path = u.localpath()
541 544 if u.scheme == b'bundle':
542 545 s = path.split(b"+", 1)
543 546 if len(s) == 1:
544 547 repopath, bundlename = parentpath, s[0]
545 548 else:
546 549 repopath, bundlename = s
547 550 else:
548 551 repopath, bundlename = parentpath, path
549 552
550 553 return makebundlerepository(ui, repopath, bundlename)
551 554
552 555
553 556 def makebundlerepository(ui, repopath, bundlepath):
554 557 """Make a bundle repository object based on repo and bundle paths."""
555 558 if repopath:
556 559 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
557 560 else:
558 561 url = b'bundle:%s' % bundlepath
559 562
560 563 # Because we can't make any guarantees about the type of the base
561 564 # repository, we can't have a static class representing the bundle
562 565 # repository. We also can't make any guarantees about how to even
563 566 # call the base repository's constructor!
564 567 #
565 568 # So, our strategy is to go through ``localrepo.instance()`` to construct
566 569 # a repo instance. Then, we dynamically create a new type derived from
567 570 # both it and our ``bundlerepository`` class which overrides some
568 571 # functionality. We then change the type of the constructed repository
569 572 # to this new type and initialize the bundle-specific bits of it.
570 573
571 574 try:
572 575 repo = localrepo.instance(ui, repopath, create=False)
573 576 tempparent = None
574 577 except error.RequirementError:
575 578 raise # no fallback if the backing repo is unsupported
576 579 except error.RepoError:
577 580 tempparent = pycompat.mkdtemp()
578 581 try:
579 582 repo = localrepo.instance(ui, tempparent, create=True)
580 583 except Exception:
581 584 shutil.rmtree(tempparent)
582 585 raise
583 586
584 587 class derivedbundlerepository(bundlerepository, repo.__class__):
585 588 pass
586 589
587 590 repo.__class__ = derivedbundlerepository
588 591 bundlerepository.__init__(repo, bundlepath, url, tempparent)
589 592
590 593 return repo
591 594
592 595
593 596 class bundletransactionmanager:
594 597 def transaction(self):
595 598 return None
596 599
597 600 def close(self):
598 601 raise NotImplementedError
599 602
600 603 def release(self):
601 604 raise NotImplementedError
602 605
603 606
604 607 def getremotechanges(
605 608 ui, repo, peer, onlyheads=None, bundlename=None, force=False
606 609 ):
607 610 """obtains a bundle of changes incoming from peer
608 611
609 612 "onlyheads" restricts the returned changes to those reachable from the
610 613 specified heads.
611 614 "bundlename", if given, stores the bundle to this file path permanently;
612 615 otherwise it's stored to a temp file and gets deleted again when you call
613 616 the returned "cleanupfn".
614 617 "force" indicates whether to proceed on unrelated repos.
615 618
616 619 Returns a tuple (local, csets, cleanupfn):
617 620
618 621 "local" is a local repo from which to obtain the actual incoming
619 622 changesets; it is a bundlerepo for the obtained bundle when the
620 623 original "peer" is remote.
621 624 "csets" lists the incoming changeset node ids.
622 625 "cleanupfn" must be called without arguments when you're done processing
623 626 the changes; it closes both the original "peer" and the one returned
624 627 here.
625 628 """
626 629 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
627 630 common, incoming, rheads = tmp
628 631 if not incoming:
629 632 try:
630 633 if bundlename:
631 634 os.unlink(bundlename)
632 635 except OSError:
633 636 pass
634 637 return repo, [], peer.close
635 638
636 639 commonset = set(common)
637 640 rheads = [x for x in rheads if x not in commonset]
638 641
639 642 bundle = None
640 643 bundlerepo = None
641 644 localrepo = peer.local()
642 645 if bundlename or not localrepo:
643 646 # create a bundle (uncompressed if peer repo is not local)
644 647
645 648 # developer config: devel.legacy.exchange
646 649 legexc = ui.configlist(b'devel', b'legacy.exchange')
647 650 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
648 651 canbundle2 = (
649 652 not forcebundle1
650 653 and peer.capable(b'getbundle')
651 654 and peer.capable(b'bundle2')
652 655 )
653 656 if canbundle2:
654 657 with peer.commandexecutor() as e:
655 658 b2 = e.callcommand(
656 659 b'getbundle',
657 660 {
658 661 b'source': b'incoming',
659 662 b'common': common,
660 663 b'heads': rheads,
661 664 b'bundlecaps': exchange.caps20to10(
662 665 repo, role=b'client'
663 666 ),
664 667 b'cg': True,
665 668 },
666 669 ).result()
667 670
668 671 fname = bundle = changegroup.writechunks(
669 672 ui, b2._forwardchunks(), bundlename
670 673 )
671 674 else:
672 675 if peer.capable(b'getbundle'):
673 676 with peer.commandexecutor() as e:
674 677 cg = e.callcommand(
675 678 b'getbundle',
676 679 {
677 680 b'source': b'incoming',
678 681 b'common': common,
679 682 b'heads': rheads,
680 683 },
681 684 ).result()
682 685 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
683 686 # compat with older servers when pulling all remote heads
684 687
685 688 with peer.commandexecutor() as e:
686 689 cg = e.callcommand(
687 690 b'changegroup',
688 691 {
689 692 b'nodes': incoming,
690 693 b'source': b'incoming',
691 694 },
692 695 ).result()
693 696
694 697 rheads = None
695 698 else:
696 699 with peer.commandexecutor() as e:
697 700 cg = e.callcommand(
698 701 b'changegroupsubset',
699 702 {
700 703 b'bases': incoming,
701 704 b'heads': rheads,
702 705 b'source': b'incoming',
703 706 },
704 707 ).result()
705 708
706 709 if localrepo:
707 710 bundletype = b"HG10BZ"
708 711 else:
709 712 bundletype = b"HG10UN"
710 713 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
711 714 # keep written bundle?
712 715 if bundlename:
713 716 bundle = None
714 717 if not localrepo:
715 718 # use the created uncompressed bundlerepo
716 719 localrepo = bundlerepo = makebundlerepository(
717 720 repo.baseui, repo.root, fname
718 721 )
719 722
720 723 # this repo contains local and peer now, so filter out local again
721 724 common = repo.heads()
722 725 if localrepo:
723 726 # Part of common may be remotely filtered
724 727 # So use an unfiltered version
725 728 # The discovery process probably need cleanup to avoid that
726 729 localrepo = localrepo.unfiltered()
727 730
728 731 csets = localrepo.changelog.findmissing(common, rheads)
729 732
730 733 if bundlerepo:
731 734 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
732 735
733 736 with peer.commandexecutor() as e:
734 737 remotephases = e.callcommand(
735 738 b'listkeys',
736 739 {
737 740 b'namespace': b'phases',
738 741 },
739 742 ).result()
740 743
741 744 pullop = exchange.pulloperation(
742 745 bundlerepo, peer, path=None, heads=reponodes
743 746 )
744 747 pullop.trmanager = bundletransactionmanager()
745 748 exchange._pullapplyphases(pullop, remotephases)
746 749
747 750 def cleanup():
748 751 if bundlerepo:
749 752 bundlerepo.close()
750 753 if bundle:
751 754 os.unlink(bundle)
752 755 peer.close()
753 756
754 757 return (localrepo, csets, cleanup)
@@ -1,3971 +1,3974 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 delta_config,
357 357 feature_config,
358 358 chunk_cache,
359 359 default_compression_header,
360 360 ):
361 361 self.opener = opener
362 362 self.index = index
363 363
364 364 self.__index_file = index_file
365 365 self.data_file = data_file
366 366 self.sidedata_file = sidedata_file
367 367 self.inline = inline
368 368 self.data_config = data_config
369 369 self.delta_config = delta_config
370 370 self.feature_config = feature_config
371 371
372 372 self._default_compression_header = default_compression_header
373 373
374 374 # index
375 375
376 376 # 3-tuple of file handles being used for active writing.
377 377 self._writinghandles = None
378 378
379 379 self._segmentfile = randomaccessfile.randomaccessfile(
380 380 self.opener,
381 381 (self.index_file if self.inline else self.data_file),
382 382 self.data_config.chunk_cache_size,
383 383 chunk_cache,
384 384 )
385 385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 386 self.opener,
387 387 self.sidedata_file,
388 388 self.data_config.chunk_cache_size,
389 389 )
390 390
391 391 # revlog header -> revlog compressor
392 392 self._decompressors = {}
393 # 3-tuple of (node, rev, text) for a raw revision.
394 self._revisioncache = None
393 395
394 396 @property
395 397 def index_file(self):
396 398 return self.__index_file
397 399
398 400 @index_file.setter
399 401 def index_file(self, new_index_file):
400 402 self.__index_file = new_index_file
401 403 if self.inline:
402 404 self._segmentfile.filename = new_index_file
403 405
404 406 def __len__(self):
405 407 return len(self.index)
406 408
407 409 # Derived from index values.
408 410
409 411 def start(self, rev):
410 412 """the offset of the data chunk for this revision"""
411 413 return int(self.index[rev][0] >> 16)
412 414
413 415 def length(self, rev):
414 416 """the length of the data chunk for this revision"""
415 417 return self.index[rev][1]
416 418
417 419 def end(self, rev):
418 420 """the end of the data chunk for this revision"""
419 421 return self.start(rev) + self.length(rev)
420 422
421 423 def deltaparent(self, rev):
422 424 """return deltaparent of the given revision"""
423 425 base = self.index[rev][3]
424 426 if base == rev:
425 427 return nullrev
426 428 elif self.delta_config.general_delta:
427 429 return base
428 430 else:
429 431 return rev - 1
430 432
431 433 def issnapshot(self, rev):
432 434 """tells whether rev is a snapshot"""
433 435 if not self.delta_config.sparse_revlog:
434 436 return self.deltaparent(rev) == nullrev
435 437 elif hasattr(self.index, 'issnapshot'):
436 438 # directly assign the method to cache the testing and access
437 439 self.issnapshot = self.index.issnapshot
438 440 return self.issnapshot(rev)
439 441 if rev == nullrev:
440 442 return True
441 443 entry = self.index[rev]
442 444 base = entry[3]
443 445 if base == rev:
444 446 return True
445 447 if base == nullrev:
446 448 return True
447 449 p1 = entry[5]
448 450 while self.length(p1) == 0:
449 451 b = self.deltaparent(p1)
450 452 if b == p1:
451 453 break
452 454 p1 = b
453 455 p2 = entry[6]
454 456 while self.length(p2) == 0:
455 457 b = self.deltaparent(p2)
456 458 if b == p2:
457 459 break
458 460 p2 = b
459 461 if base == p1 or base == p2:
460 462 return False
461 463 return self.issnapshot(base)
462 464
463 465 def _deltachain(self, rev, stoprev=None):
464 466 """Obtain the delta chain for a revision.
465 467
466 468 ``stoprev`` specifies a revision to stop at. If not specified, we
467 469 stop at the base of the chain.
468 470
469 471 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
470 472 revs in ascending order and ``stopped`` is a bool indicating whether
471 473 ``stoprev`` was hit.
472 474 """
473 475 generaldelta = self.delta_config.general_delta
474 476 # Try C implementation.
475 477 try:
476 478 return self.index.deltachain(rev, stoprev, generaldelta)
477 479 except AttributeError:
478 480 pass
479 481
480 482 chain = []
481 483
482 484 # Alias to prevent attribute lookup in tight loop.
483 485 index = self.index
484 486
485 487 iterrev = rev
486 488 e = index[iterrev]
487 489 while iterrev != e[3] and iterrev != stoprev:
488 490 chain.append(iterrev)
489 491 if generaldelta:
490 492 iterrev = e[3]
491 493 else:
492 494 iterrev -= 1
493 495 e = index[iterrev]
494 496
495 497 if iterrev == stoprev:
496 498 stopped = True
497 499 else:
498 500 chain.append(iterrev)
499 501 stopped = False
500 502
501 503 chain.reverse()
502 504 return chain, stopped
503 505
504 506 @util.propertycache
505 507 def _compressor(self):
506 508 engine = util.compengines[self.feature_config.compression_engine]
507 509 return engine.revlogcompressor(
508 510 self.feature_config.compression_engine_options
509 511 )
510 512
511 513 @util.propertycache
512 514 def _decompressor(self):
513 515 """the default decompressor"""
514 516 if self._default_compression_header is None:
515 517 return None
516 518 t = self._default_compression_header
517 519 c = self._get_decompressor(t)
518 520 return c.decompress
519 521
520 522 def _get_decompressor(self, t):
521 523 try:
522 524 compressor = self._decompressors[t]
523 525 except KeyError:
524 526 try:
525 527 engine = util.compengines.forrevlogheader(t)
526 528 compressor = engine.revlogcompressor(
527 529 self.feature_config.compression_engine_options
528 530 )
529 531 self._decompressors[t] = compressor
530 532 except KeyError:
531 533 raise error.RevlogError(
532 534 _(b'unknown compression type %s') % binascii.hexlify(t)
533 535 )
534 536 return compressor
535 537
536 538 def compress(self, data):
537 539 """Generate a possibly-compressed representation of data."""
538 540 if not data:
539 541 return b'', data
540 542
541 543 compressed = self._compressor.compress(data)
542 544
543 545 if compressed:
544 546 # The revlog compressor added the header in the returned data.
545 547 return b'', compressed
546 548
547 549 if data[0:1] == b'\0':
548 550 return b'', data
549 551 return b'u', data
550 552
551 553 def decompress(self, data):
552 554 """Decompress a revlog chunk.
553 555
554 556 The chunk is expected to begin with a header identifying the
555 557 format type so it can be routed to an appropriate decompressor.
556 558 """
557 559 if not data:
558 560 return data
559 561
560 562 # Revlogs are read much more frequently than they are written and many
561 563 # chunks only take microseconds to decompress, so performance is
562 564 # important here.
563 565 #
564 566 # We can make a few assumptions about revlogs:
565 567 #
566 568 # 1) the majority of chunks will be compressed (as opposed to inline
567 569 # raw data).
568 570 # 2) decompressing *any* data will likely by at least 10x slower than
569 571 # returning raw inline data.
570 572 # 3) we want to prioritize common and officially supported compression
571 573 # engines
572 574 #
573 575 # It follows that we want to optimize for "decompress compressed data
574 576 # when encoded with common and officially supported compression engines"
575 577 # case over "raw data" and "data encoded by less common or non-official
576 578 # compression engines." That is why we have the inline lookup first
577 579 # followed by the compengines lookup.
578 580 #
579 581 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
580 582 # compressed chunks. And this matters for changelog and manifest reads.
581 583 t = data[0:1]
582 584
583 585 if t == b'x':
584 586 try:
585 587 return _zlibdecompress(data)
586 588 except zlib.error as e:
587 589 raise error.RevlogError(
588 590 _(b'revlog decompress error: %s')
589 591 % stringutil.forcebytestr(e)
590 592 )
591 593 # '\0' is more common than 'u' so it goes first.
592 594 elif t == b'\0':
593 595 return data
594 596 elif t == b'u':
595 597 return util.buffer(data, 1)
596 598
597 599 compressor = self._get_decompressor(t)
598 600
599 601 return compressor.decompress(data)
600 602
601 603 @contextlib.contextmanager
602 604 def reading(self):
603 605 """Context manager that keeps data and sidedata files open for reading"""
604 606 if len(self.index) == 0:
605 607 yield # nothing to be read
606 608 else:
607 609 with self._segmentfile.reading():
608 610 with self._segmentfile_sidedata.reading():
609 611 yield
610 612
611 613 @property
612 614 def is_writing(self):
613 615 """True is a writing context is open"""
614 616 return self._writinghandles is not None
615 617
616 618 @contextlib.contextmanager
617 619 def writing(self, transaction, data_end=None, sidedata_end=None):
618 620 """Open the revlog files for writing
619 621
620 622 Add content to a revlog should be done within such context.
621 623 """
622 624 if self.is_writing:
623 625 yield
624 626 else:
625 627 ifh = dfh = sdfh = None
626 628 try:
627 629 r = len(self.index)
628 630 # opening the data file.
629 631 dsize = 0
630 632 if r:
631 633 dsize = self.end(r - 1)
632 634 dfh = None
633 635 if not self.inline:
634 636 try:
635 637 dfh = self.opener(self.data_file, mode=b"r+")
636 638 if data_end is None:
637 639 dfh.seek(0, os.SEEK_END)
638 640 else:
639 641 dfh.seek(data_end, os.SEEK_SET)
640 642 except FileNotFoundError:
641 643 dfh = self.opener(self.data_file, mode=b"w+")
642 644 transaction.add(self.data_file, dsize)
643 645 if self.sidedata_file is not None:
644 646 assert sidedata_end is not None
645 647 # revlog-v2 does not inline, help Pytype
646 648 assert dfh is not None
647 649 try:
648 650 sdfh = self.opener(self.sidedata_file, mode=b"r+")
649 651 dfh.seek(sidedata_end, os.SEEK_SET)
650 652 except FileNotFoundError:
651 653 sdfh = self.opener(self.sidedata_file, mode=b"w+")
652 654 transaction.add(self.sidedata_file, sidedata_end)
653 655
654 656 # opening the index file.
655 657 isize = r * self.index.entry_size
656 658 ifh = self.__index_write_fp()
657 659 if self.inline:
658 660 transaction.add(self.index_file, dsize + isize)
659 661 else:
660 662 transaction.add(self.index_file, isize)
661 663 # exposing all file handle for writing.
662 664 self._writinghandles = (ifh, dfh, sdfh)
663 665 self._segmentfile.writing_handle = ifh if self.inline else dfh
664 666 self._segmentfile_sidedata.writing_handle = sdfh
665 667 yield
666 668 finally:
667 669 self._writinghandles = None
668 670 self._segmentfile.writing_handle = None
669 671 self._segmentfile_sidedata.writing_handle = None
670 672 if dfh is not None:
671 673 dfh.close()
672 674 if sdfh is not None:
673 675 sdfh.close()
674 676 # closing the index file last to avoid exposing referent to
675 677 # potential unflushed data content.
676 678 if ifh is not None:
677 679 ifh.close()
678 680
679 681 def __index_write_fp(self, index_end=None):
680 682 """internal method to open the index file for writing
681 683
682 684 You should not use this directly and use `_writing` instead
683 685 """
684 686 try:
685 687 f = self.opener(
686 688 self.index_file,
687 689 mode=b"r+",
688 690 checkambig=self.data_config.check_ambig,
689 691 )
690 692 if index_end is None:
691 693 f.seek(0, os.SEEK_END)
692 694 else:
693 695 f.seek(index_end, os.SEEK_SET)
694 696 return f
695 697 except FileNotFoundError:
696 698 return self.opener(
697 699 self.index_file,
698 700 mode=b"w+",
699 701 checkambig=self.data_config.check_ambig,
700 702 )
701 703
702 704 def __index_new_fp(self):
703 705 """internal method to create a new index file for writing
704 706
705 707 You should not use this unless you are upgrading from inline revlog
706 708 """
707 709 return self.opener(
708 710 self.index_file,
709 711 mode=b"w",
710 712 checkambig=self.data_config.check_ambig,
711 713 atomictemp=True,
712 714 )
713 715
714 716 def split_inline(self, tr, header, new_index_file_path=None):
715 717 """split the data of an inline revlog into an index and a data file"""
716 718 existing_handles = False
717 719 if self._writinghandles is not None:
718 720 existing_handles = True
719 721 fp = self._writinghandles[0]
720 722 fp.flush()
721 723 fp.close()
722 724 # We can't use the cached file handle after close(). So prevent
723 725 # its usage.
724 726 self._writinghandles = None
725 727 self._segmentfile.writing_handle = None
726 728 # No need to deal with sidedata writing handle as it is only
727 729 # relevant with revlog-v2 which is never inline, not reaching
728 730 # this code
729 731
730 732 new_dfh = self.opener(self.data_file, mode=b"w+")
731 733 new_dfh.truncate(0) # drop any potentially existing data
732 734 try:
733 735 with self.reading():
734 736 for r in range(len(self.index)):
735 737 new_dfh.write(self.get_segment_for_revs(r, r)[1])
736 738 new_dfh.flush()
737 739
738 740 if new_index_file_path is not None:
739 741 self.index_file = new_index_file_path
740 742 with self.__index_new_fp() as fp:
741 743 self.inline = False
742 744 for i in range(len(self.index)):
743 745 e = self.index.entry_binary(i)
744 746 if i == 0:
745 747 packed_header = self.index.pack_header(header)
746 748 e = packed_header + e
747 749 fp.write(e)
748 750
749 751 # If we don't use side-write, the temp file replace the real
750 752 # index when we exit the context manager
751 753
752 754 self._segmentfile = randomaccessfile.randomaccessfile(
753 755 self.opener,
754 756 self.data_file,
755 757 self.data_config.chunk_cache_size,
756 758 )
757 759
758 760 if existing_handles:
759 761 # switched from inline to conventional reopen the index
760 762 ifh = self.__index_write_fp()
761 763 self._writinghandles = (ifh, new_dfh, None)
762 764 self._segmentfile.writing_handle = new_dfh
763 765 new_dfh = None
764 766 # No need to deal with sidedata writing handle as it is only
765 767 # relevant with revlog-v2 which is never inline, not reaching
766 768 # this code
767 769 finally:
768 770 if new_dfh is not None:
769 771 new_dfh.close()
770 772 return self.index_file
771 773
772 774 def get_segment_for_revs(self, startrev, endrev):
773 775 """Obtain a segment of raw data corresponding to a range of revisions.
774 776
775 777 Accepts the start and end revisions and an optional already-open
776 778 file handle to be used for reading. If the file handle is read, its
777 779 seek position will not be preserved.
778 780
779 781 Requests for data may be satisfied by a cache.
780 782
781 783 Returns a 2-tuple of (offset, data) for the requested range of
782 784 revisions. Offset is the integer offset from the beginning of the
783 785 revlog and data is a str or buffer of the raw byte data.
784 786
785 787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
786 788 to determine where each revision's data begins and ends.
787 789
788 790 API: we should consider making this a private part of the InnerRevlog
789 791 at some point.
790 792 """
791 793 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
792 794 # (functions are expensive).
793 795 index = self.index
794 796 istart = index[startrev]
795 797 start = int(istart[0] >> 16)
796 798 if startrev == endrev:
797 799 end = start + istart[1]
798 800 else:
799 801 iend = index[endrev]
800 802 end = int(iend[0] >> 16) + iend[1]
801 803
802 804 if self.inline:
803 805 start += (startrev + 1) * self.index.entry_size
804 806 end += (endrev + 1) * self.index.entry_size
805 807 length = end - start
806 808
807 809 return start, self._segmentfile.read_chunk(start, length)
808 810
809 811 def _chunk(self, rev):
810 812 """Obtain a single decompressed chunk for a revision.
811 813
812 814 Accepts an integer revision and an optional already-open file handle
813 815 to be used for reading. If used, the seek position of the file will not
814 816 be preserved.
815 817
816 818 Returns a str holding uncompressed data for the requested revision.
817 819 """
818 820 compression_mode = self.index[rev][10]
819 821 data = self.get_segment_for_revs(rev, rev)[1]
820 822 if compression_mode == COMP_MODE_PLAIN:
821 823 return data
822 824 elif compression_mode == COMP_MODE_DEFAULT:
823 825 return self._decompressor(data)
824 826 elif compression_mode == COMP_MODE_INLINE:
825 827 return self.decompress(data)
826 828 else:
827 829 msg = b'unknown compression mode %d'
828 830 msg %= compression_mode
829 831 raise error.RevlogError(msg)
830 832
831 833 def _chunks(self, revs, targetsize=None):
832 834 """Obtain decompressed chunks for the specified revisions.
833 835
834 836 Accepts an iterable of numeric revisions that are assumed to be in
835 837 ascending order. Also accepts an optional already-open file handle
836 838 to be used for reading. If used, the seek position of the file will
837 839 not be preserved.
838 840
839 841 This function is similar to calling ``self._chunk()`` multiple times,
840 842 but is faster.
841 843
842 844 Returns a list with decompressed data for each requested revision.
843 845 """
844 846 if not revs:
845 847 return []
846 848 start = self.start
847 849 length = self.length
848 850 inline = self.inline
849 851 iosize = self.index.entry_size
850 852 buffer = util.buffer
851 853
852 854 l = []
853 855 ladd = l.append
854 856
855 857 if not self.data_config.with_sparse_read:
856 858 slicedchunks = (revs,)
857 859 else:
858 860 slicedchunks = deltautil.slicechunk(
859 861 self,
860 862 revs,
861 863 targetsize=targetsize,
862 864 )
863 865
864 866 for revschunk in slicedchunks:
865 867 firstrev = revschunk[0]
866 868 # Skip trailing revisions with empty diff
867 869 for lastrev in revschunk[::-1]:
868 870 if length(lastrev) != 0:
869 871 break
870 872
871 873 try:
872 874 offset, data = self.get_segment_for_revs(firstrev, lastrev)
873 875 except OverflowError:
874 876 # issue4215 - we can't cache a run of chunks greater than
875 877 # 2G on Windows
876 878 return [self._chunk(rev) for rev in revschunk]
877 879
878 880 decomp = self.decompress
879 881 # self._decompressor might be None, but will not be used in that case
880 882 def_decomp = self._decompressor
881 883 for rev in revschunk:
882 884 chunkstart = start(rev)
883 885 if inline:
884 886 chunkstart += (rev + 1) * iosize
885 887 chunklength = length(rev)
886 888 comp_mode = self.index[rev][10]
887 889 c = buffer(data, chunkstart - offset, chunklength)
888 890 if comp_mode == COMP_MODE_PLAIN:
889 891 ladd(c)
890 892 elif comp_mode == COMP_MODE_INLINE:
891 893 ladd(decomp(c))
892 894 elif comp_mode == COMP_MODE_DEFAULT:
893 895 ladd(def_decomp(c))
894 896 else:
895 897 msg = b'unknown compression mode %d'
896 898 msg %= comp_mode
897 899 raise error.RevlogError(msg)
898 900
899 901 return l
900 902
901 903
902 904 class revlog:
903 905 """
904 906 the underlying revision storage object
905 907
906 908 A revlog consists of two parts, an index and the revision data.
907 909
908 910 The index is a file with a fixed record size containing
909 911 information on each revision, including its nodeid (hash), the
910 912 nodeids of its parents, the position and offset of its data within
911 913 the data file, and the revision it's based on. Finally, each entry
912 914 contains a linkrev entry that can serve as a pointer to external
913 915 data.
914 916
915 917 The revision data itself is a linear collection of data chunks.
916 918 Each chunk represents a revision and is usually represented as a
917 919 delta against the previous chunk. To bound lookup time, runs of
918 920 deltas are limited to about 2 times the length of the original
919 921 version data. This makes retrieval of a version proportional to
920 922 its size, or O(1) relative to the number of revisions.
921 923
922 924 Both pieces of the revlog are written to in an append-only
923 925 fashion, which means we never need to rewrite a file to insert or
924 926 remove data, and can use some simple techniques to avoid the need
925 927 for locking while reading.
926 928
927 929 If checkambig, indexfile is opened with checkambig=True at
928 930 writing, to avoid file stat ambiguity.
929 931
930 932 If mmaplargeindex is True, and an mmapindexthreshold is set, the
931 933 index will be mmapped rather than read if it is larger than the
932 934 configured threshold.
933 935
934 936 If censorable is True, the revlog can have censored revisions.
935 937
936 938 If `upperboundcomp` is not None, this is the expected maximal gain from
937 939 compression for the data content.
938 940
939 941 `concurrencychecker` is an optional function that receives 3 arguments: a
940 942 file handle, a filename, and an expected position. It should check whether
941 943 the current position in the file handle is valid, and log/warn/fail (by
942 944 raising).
943 945
944 946 See mercurial/revlogutils/contants.py for details about the content of an
945 947 index entry.
946 948 """
947 949
948 950 _flagserrorclass = error.RevlogError
949 951
950 952 @staticmethod
951 953 def is_inline_index(header_bytes):
952 954 """Determine if a revlog is inline from the initial bytes of the index"""
953 955 header = INDEX_HEADER.unpack(header_bytes)[0]
954 956
955 957 _format_flags = header & ~0xFFFF
956 958 _format_version = header & 0xFFFF
957 959
958 960 features = FEATURES_BY_VERSION[_format_version]
959 961 return features[b'inline'](_format_flags)
960 962
961 963 def __init__(
962 964 self,
963 965 opener,
964 966 target,
965 967 radix,
966 968 postfix=None, # only exist for `tmpcensored` now
967 969 checkambig=False,
968 970 mmaplargeindex=False,
969 971 censorable=False,
970 972 upperboundcomp=None,
971 973 persistentnodemap=False,
972 974 concurrencychecker=None,
973 975 trypending=False,
974 976 try_split=False,
975 977 canonical_parent_order=True,
976 978 ):
977 979 """
978 980 create a revlog object
979 981
980 982 opener is a function that abstracts the file opening operation
981 983 and can be used to implement COW semantics or the like.
982 984
983 985 `target`: a (KIND, ID) tuple that identify the content stored in
984 986 this revlog. It help the rest of the code to understand what the revlog
985 987 is about without having to resort to heuristic and index filename
986 988 analysis. Note: that this must be reliably be set by normal code, but
987 989 that test, debug, or performance measurement code might not set this to
988 990 accurate value.
989 991 """
990 992
991 993 self.radix = radix
992 994
993 995 self._docket_file = None
994 996 self._indexfile = None
995 997 self._datafile = None
996 998 self._sidedatafile = None
997 999 self._nodemap_file = None
998 1000 self.postfix = postfix
999 1001 self._trypending = trypending
1000 1002 self._try_split = try_split
1001 1003 self.opener = opener
1002 1004 if persistentnodemap:
1003 1005 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1004 1006
1005 1007 assert target[0] in ALL_KINDS
1006 1008 assert len(target) == 2
1007 1009 self.target = target
1008 1010 if b'feature-config' in self.opener.options:
1009 1011 self.feature_config = self.opener.options[b'feature-config'].copy()
1010 1012 else:
1011 1013 self.feature_config = FeatureConfig()
1012 1014 self.feature_config.censorable = censorable
1013 1015 self.feature_config.canonical_parent_order = canonical_parent_order
1014 1016 if b'data-config' in self.opener.options:
1015 1017 self.data_config = self.opener.options[b'data-config'].copy()
1016 1018 else:
1017 1019 self.data_config = DataConfig()
1018 1020 self.data_config.check_ambig = checkambig
1019 1021 self.data_config.mmap_large_index = mmaplargeindex
1020 1022 if b'delta-config' in self.opener.options:
1021 1023 self.delta_config = self.opener.options[b'delta-config'].copy()
1022 1024 else:
1023 1025 self.delta_config = DeltaConfig()
1024 1026 self.delta_config.upper_bound_comp = upperboundcomp
1025 1027
1026 # 3-tuple of (node, rev, text) for a raw revision.
1027 self._revisioncache = None
1028 1028 # Maps rev to chain base rev.
1029 1029 self._chainbasecache = util.lrucachedict(100)
1030 1030
1031 1031 self.index = None
1032 1032 self._docket = None
1033 1033 self._nodemap_docket = None
1034 1034 # Mapping of partial identifiers to full nodes.
1035 1035 self._pcache = {}
1036 1036
1037 1037 # other optionnals features
1038 1038
1039 1039 # Make copy of flag processors so each revlog instance can support
1040 1040 # custom flags.
1041 1041 self._flagprocessors = dict(flagutil.flagprocessors)
1042 1042 # prevent nesting of addgroup
1043 1043 self._adding_group = None
1044 1044
1045 1045 chunk_cache = self._loadindex()
1046 1046 self._load_inner(chunk_cache)
1047 1047 self._concurrencychecker = concurrencychecker
1048 1048
1049 1049 @property
1050 1050 def _generaldelta(self):
1051 1051 """temporary compatibility proxy"""
1052 1052 util.nouideprecwarn(
1053 1053 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1054 1054 )
1055 1055 return self.delta_config.general_delta
1056 1056
1057 1057 @property
1058 1058 def _checkambig(self):
1059 1059 """temporary compatibility proxy"""
1060 1060 util.nouideprecwarn(
1061 1061 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1062 1062 )
1063 1063 return self.data_config.check_ambig
1064 1064
1065 1065 @property
1066 1066 def _mmaplargeindex(self):
1067 1067 """temporary compatibility proxy"""
1068 1068 util.nouideprecwarn(
1069 1069 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1070 1070 )
1071 1071 return self.data_config.mmap_large_index
1072 1072
1073 1073 @property
1074 1074 def _censorable(self):
1075 1075 """temporary compatibility proxy"""
1076 1076 util.nouideprecwarn(
1077 1077 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1078 1078 )
1079 1079 return self.feature_config.censorable
1080 1080
1081 1081 @property
1082 1082 def _chunkcachesize(self):
1083 1083 """temporary compatibility proxy"""
1084 1084 util.nouideprecwarn(
1085 1085 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1086 1086 )
1087 1087 return self.data_config.chunk_cache_size
1088 1088
1089 1089 @property
1090 1090 def _maxchainlen(self):
1091 1091 """temporary compatibility proxy"""
1092 1092 util.nouideprecwarn(
1093 1093 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1094 1094 )
1095 1095 return self.delta_config.max_chain_len
1096 1096
1097 1097 @property
1098 1098 def _deltabothparents(self):
1099 1099 """temporary compatibility proxy"""
1100 1100 util.nouideprecwarn(
1101 1101 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1102 1102 )
1103 1103 return self.delta_config.delta_both_parents
1104 1104
1105 1105 @property
1106 1106 def _candidate_group_chunk_size(self):
1107 1107 """temporary compatibility proxy"""
1108 1108 util.nouideprecwarn(
1109 1109 b"use revlog.delta_config.candidate_group_chunk_size",
1110 1110 b"6.6",
1111 1111 stacklevel=2,
1112 1112 )
1113 1113 return self.delta_config.candidate_group_chunk_size
1114 1114
1115 1115 @property
1116 1116 def _debug_delta(self):
1117 1117 """temporary compatibility proxy"""
1118 1118 util.nouideprecwarn(
1119 1119 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1120 1120 )
1121 1121 return self.delta_config.debug_delta
1122 1122
1123 1123 @property
1124 1124 def _compengine(self):
1125 1125 """temporary compatibility proxy"""
1126 1126 util.nouideprecwarn(
1127 1127 b"use revlog.feature_config.compression_engine",
1128 1128 b"6.6",
1129 1129 stacklevel=2,
1130 1130 )
1131 1131 return self.feature_config.compression_engine
1132 1132
1133 1133 @property
1134 1134 def upperboundcomp(self):
1135 1135 """temporary compatibility proxy"""
1136 1136 util.nouideprecwarn(
1137 1137 b"use revlog.delta_config.upper_bound_comp",
1138 1138 b"6.6",
1139 1139 stacklevel=2,
1140 1140 )
1141 1141 return self.delta_config.upper_bound_comp
1142 1142
1143 1143 @property
1144 1144 def _compengineopts(self):
1145 1145 """temporary compatibility proxy"""
1146 1146 util.nouideprecwarn(
1147 1147 b"use revlog.feature_config.compression_engine_options",
1148 1148 b"6.6",
1149 1149 stacklevel=2,
1150 1150 )
1151 1151 return self.feature_config.compression_engine_options
1152 1152
1153 1153 @property
1154 1154 def _maxdeltachainspan(self):
1155 1155 """temporary compatibility proxy"""
1156 1156 util.nouideprecwarn(
1157 1157 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1158 1158 )
1159 1159 return self.delta_config.max_deltachain_span
1160 1160
1161 1161 @property
1162 1162 def _withsparseread(self):
1163 1163 """temporary compatibility proxy"""
1164 1164 util.nouideprecwarn(
1165 1165 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1166 1166 )
1167 1167 return self.data_config.with_sparse_read
1168 1168
1169 1169 @property
1170 1170 def _sparserevlog(self):
1171 1171 """temporary compatibility proxy"""
1172 1172 util.nouideprecwarn(
1173 1173 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1174 1174 )
1175 1175 return self.delta_config.sparse_revlog
1176 1176
1177 1177 @property
1178 1178 def hassidedata(self):
1179 1179 """temporary compatibility proxy"""
1180 1180 util.nouideprecwarn(
1181 1181 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1182 1182 )
1183 1183 return self.feature_config.has_side_data
1184 1184
1185 1185 @property
1186 1186 def _srdensitythreshold(self):
1187 1187 """temporary compatibility proxy"""
1188 1188 util.nouideprecwarn(
1189 1189 b"use revlog.data_config.sr_density_threshold",
1190 1190 b"6.6",
1191 1191 stacklevel=2,
1192 1192 )
1193 1193 return self.data_config.sr_density_threshold
1194 1194
1195 1195 @property
1196 1196 def _srmingapsize(self):
1197 1197 """temporary compatibility proxy"""
1198 1198 util.nouideprecwarn(
1199 1199 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1200 1200 )
1201 1201 return self.data_config.sr_min_gap_size
1202 1202
1203 1203 @property
1204 1204 def _compute_rank(self):
1205 1205 """temporary compatibility proxy"""
1206 1206 util.nouideprecwarn(
1207 1207 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1208 1208 )
1209 1209 return self.feature_config.compute_rank
1210 1210
1211 1211 @property
1212 1212 def canonical_parent_order(self):
1213 1213 """temporary compatibility proxy"""
1214 1214 util.nouideprecwarn(
1215 1215 b"use revlog.feature_config.canonical_parent_order",
1216 1216 b"6.6",
1217 1217 stacklevel=2,
1218 1218 )
1219 1219 return self.feature_config.canonical_parent_order
1220 1220
1221 1221 @property
1222 1222 def _lazydelta(self):
1223 1223 """temporary compatibility proxy"""
1224 1224 util.nouideprecwarn(
1225 1225 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1226 1226 )
1227 1227 return self.delta_config.lazy_delta
1228 1228
1229 1229 @property
1230 1230 def _lazydeltabase(self):
1231 1231 """temporary compatibility proxy"""
1232 1232 util.nouideprecwarn(
1233 1233 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1234 1234 )
1235 1235 return self.delta_config.lazy_delta_base
1236 1236
1237 1237 def _init_opts(self):
1238 1238 """process options (from above/config) to setup associated default revlog mode
1239 1239
1240 1240 These values might be affected when actually reading on disk information.
1241 1241
1242 1242 The relevant values are returned for use in _loadindex().
1243 1243
1244 1244 * newversionflags:
1245 1245 version header to use if we need to create a new revlog
1246 1246
1247 1247 * mmapindexthreshold:
1248 1248 minimal index size for start to use mmap
1249 1249
1250 1250 * force_nodemap:
1251 1251 force the usage of a "development" version of the nodemap code
1252 1252 """
1253 1253 opts = self.opener.options
1254 1254
1255 1255 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1256 1256 new_header = CHANGELOGV2
1257 1257 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1258 1258 self.feature_config.compute_rank = compute_rank
1259 1259 elif b'revlogv2' in opts:
1260 1260 new_header = REVLOGV2
1261 1261 elif b'revlogv1' in opts:
1262 1262 new_header = REVLOGV1 | FLAG_INLINE_DATA
1263 1263 if b'generaldelta' in opts:
1264 1264 new_header |= FLAG_GENERALDELTA
1265 1265 elif b'revlogv0' in self.opener.options:
1266 1266 new_header = REVLOGV0
1267 1267 else:
1268 1268 new_header = REVLOG_DEFAULT_VERSION
1269 1269
1270 1270 mmapindexthreshold = None
1271 1271 if self.data_config.mmap_large_index:
1272 1272 mmapindexthreshold = self.data_config.mmap_index_threshold
1273 1273 if self.feature_config.enable_ellipsis:
1274 1274 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1275 1275
1276 1276 # revlog v0 doesn't have flag processors
1277 1277 for flag, processor in opts.get(b'flagprocessors', {}).items():
1278 1278 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1279 1279
1280 1280 chunk_cache_size = self.data_config.chunk_cache_size
1281 1281 if chunk_cache_size <= 0:
1282 1282 raise error.RevlogError(
1283 1283 _(b'revlog chunk cache size %r is not greater than 0')
1284 1284 % chunk_cache_size
1285 1285 )
1286 1286 elif chunk_cache_size & (chunk_cache_size - 1):
1287 1287 raise error.RevlogError(
1288 1288 _(b'revlog chunk cache size %r is not a power of 2')
1289 1289 % chunk_cache_size
1290 1290 )
1291 1291 force_nodemap = opts.get(b'devel-force-nodemap', False)
1292 1292 return new_header, mmapindexthreshold, force_nodemap
1293 1293
1294 1294 def _get_data(self, filepath, mmap_threshold, size=None):
1295 1295 """return a file content with or without mmap
1296 1296
1297 1297 If the file is missing return the empty string"""
1298 1298 try:
1299 1299 with self.opener(filepath) as fp:
1300 1300 if mmap_threshold is not None:
1301 1301 file_size = self.opener.fstat(fp).st_size
1302 1302 if file_size >= mmap_threshold:
1303 1303 if size is not None:
1304 1304 # avoid potentiel mmap crash
1305 1305 size = min(file_size, size)
1306 1306 # TODO: should .close() to release resources without
1307 1307 # relying on Python GC
1308 1308 if size is None:
1309 1309 return util.buffer(util.mmapread(fp))
1310 1310 else:
1311 1311 return util.buffer(util.mmapread(fp, size))
1312 1312 if size is None:
1313 1313 return fp.read()
1314 1314 else:
1315 1315 return fp.read(size)
1316 1316 except FileNotFoundError:
1317 1317 return b''
1318 1318
1319 1319 def get_streams(self, max_linkrev, force_inline=False):
1320 1320 """return a list of streams that represent this revlog
1321 1321
1322 1322 This is used by stream-clone to do bytes to bytes copies of a repository.
1323 1323
1324 1324 This streams data for all revisions that refer to a changelog revision up
1325 1325 to `max_linkrev`.
1326 1326
1327 1327 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1328 1328
1329 1329 It returns is a list of three-tuple:
1330 1330
1331 1331 [
1332 1332 (filename, bytes_stream, stream_size),
1333 1333 …
1334 1334 ]
1335 1335 """
1336 1336 n = len(self)
1337 1337 index = self.index
1338 1338 while n > 0:
1339 1339 linkrev = index[n - 1][4]
1340 1340 if linkrev < max_linkrev:
1341 1341 break
1342 1342 # note: this loop will rarely go through multiple iterations, since
1343 1343 # it only traverses commits created during the current streaming
1344 1344 # pull operation.
1345 1345 #
1346 1346 # If this become a problem, using a binary search should cap the
1347 1347 # runtime of this.
1348 1348 n = n - 1
1349 1349 if n == 0:
1350 1350 # no data to send
1351 1351 return []
1352 1352 index_size = n * index.entry_size
1353 1353 data_size = self.end(n - 1)
1354 1354
1355 1355 # XXX we might have been split (or stripped) since the object
1356 1356 # initialization, We need to close this race too, but having a way to
1357 1357 # pre-open the file we feed to the revlog and never closing them before
1358 1358 # we are done streaming.
1359 1359
1360 1360 if self._inline:
1361 1361
1362 1362 def get_stream():
1363 1363 with self.opener(self._indexfile, mode=b"r") as fp:
1364 1364 yield None
1365 1365 size = index_size + data_size
1366 1366 if size <= 65536:
1367 1367 yield fp.read(size)
1368 1368 else:
1369 1369 yield from util.filechunkiter(fp, limit=size)
1370 1370
1371 1371 inline_stream = get_stream()
1372 1372 next(inline_stream)
1373 1373 return [
1374 1374 (self._indexfile, inline_stream, index_size + data_size),
1375 1375 ]
1376 1376 elif force_inline:
1377 1377
1378 1378 def get_stream():
1379 1379 with self.reading():
1380 1380 yield None
1381 1381
1382 1382 for rev in range(n):
1383 1383 idx = self.index.entry_binary(rev)
1384 1384 if rev == 0 and self._docket is None:
1385 1385 # re-inject the inline flag
1386 1386 header = self._format_flags
1387 1387 header |= self._format_version
1388 1388 header |= FLAG_INLINE_DATA
1389 1389 header = self.index.pack_header(header)
1390 1390 idx = header + idx
1391 1391 yield idx
1392 1392 yield self._inner.get_segment_for_revs(rev, rev)[1]
1393 1393
1394 1394 inline_stream = get_stream()
1395 1395 next(inline_stream)
1396 1396 return [
1397 1397 (self._indexfile, inline_stream, index_size + data_size),
1398 1398 ]
1399 1399 else:
1400 1400
1401 1401 def get_index_stream():
1402 1402 with self.opener(self._indexfile, mode=b"r") as fp:
1403 1403 yield None
1404 1404 if index_size <= 65536:
1405 1405 yield fp.read(index_size)
1406 1406 else:
1407 1407 yield from util.filechunkiter(fp, limit=index_size)
1408 1408
1409 1409 def get_data_stream():
1410 1410 with self._datafp() as fp:
1411 1411 yield None
1412 1412 if data_size <= 65536:
1413 1413 yield fp.read(data_size)
1414 1414 else:
1415 1415 yield from util.filechunkiter(fp, limit=data_size)
1416 1416
1417 1417 index_stream = get_index_stream()
1418 1418 next(index_stream)
1419 1419 data_stream = get_data_stream()
1420 1420 next(data_stream)
1421 1421 return [
1422 1422 (self._datafile, data_stream, data_size),
1423 1423 (self._indexfile, index_stream, index_size),
1424 1424 ]
1425 1425
1426 1426 def _loadindex(self, docket=None):
1427 1427
1428 1428 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1429 1429
1430 1430 if self.postfix is not None:
1431 1431 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1432 1432 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1433 1433 entry_point = b'%s.i.a' % self.radix
1434 1434 elif self._try_split and self.opener.exists(self._split_index_file):
1435 1435 entry_point = self._split_index_file
1436 1436 else:
1437 1437 entry_point = b'%s.i' % self.radix
1438 1438
1439 1439 if docket is not None:
1440 1440 self._docket = docket
1441 1441 self._docket_file = entry_point
1442 1442 else:
1443 1443 self._initempty = True
1444 1444 entry_data = self._get_data(entry_point, mmapindexthreshold)
1445 1445 if len(entry_data) > 0:
1446 1446 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1447 1447 self._initempty = False
1448 1448 else:
1449 1449 header = new_header
1450 1450
1451 1451 self._format_flags = header & ~0xFFFF
1452 1452 self._format_version = header & 0xFFFF
1453 1453
1454 1454 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1455 1455 if supported_flags is None:
1456 1456 msg = _(b'unknown version (%d) in revlog %s')
1457 1457 msg %= (self._format_version, self.display_id)
1458 1458 raise error.RevlogError(msg)
1459 1459 elif self._format_flags & ~supported_flags:
1460 1460 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1461 1461 display_flag = self._format_flags >> 16
1462 1462 msg %= (display_flag, self._format_version, self.display_id)
1463 1463 raise error.RevlogError(msg)
1464 1464
1465 1465 features = FEATURES_BY_VERSION[self._format_version]
1466 1466 self._inline = features[b'inline'](self._format_flags)
1467 1467 self.delta_config.general_delta = features[b'generaldelta'](
1468 1468 self._format_flags
1469 1469 )
1470 1470 self.feature_config.has_side_data = features[b'sidedata']
1471 1471
1472 1472 if not features[b'docket']:
1473 1473 self._indexfile = entry_point
1474 1474 index_data = entry_data
1475 1475 else:
1476 1476 self._docket_file = entry_point
1477 1477 if self._initempty:
1478 1478 self._docket = docketutil.default_docket(self, header)
1479 1479 else:
1480 1480 self._docket = docketutil.parse_docket(
1481 1481 self, entry_data, use_pending=self._trypending
1482 1482 )
1483 1483
1484 1484 if self._docket is not None:
1485 1485 self._indexfile = self._docket.index_filepath()
1486 1486 index_data = b''
1487 1487 index_size = self._docket.index_end
1488 1488 if index_size > 0:
1489 1489 index_data = self._get_data(
1490 1490 self._indexfile, mmapindexthreshold, size=index_size
1491 1491 )
1492 1492 if len(index_data) < index_size:
1493 1493 msg = _(b'too few index data for %s: got %d, expected %d')
1494 1494 msg %= (self.display_id, len(index_data), index_size)
1495 1495 raise error.RevlogError(msg)
1496 1496
1497 1497 self._inline = False
1498 1498 # generaldelta implied by version 2 revlogs.
1499 1499 self.delta_config.general_delta = True
1500 1500 # the logic for persistent nodemap will be dealt with within the
1501 1501 # main docket, so disable it for now.
1502 1502 self._nodemap_file = None
1503 1503
1504 1504 if self._docket is not None:
1505 1505 self._datafile = self._docket.data_filepath()
1506 1506 self._sidedatafile = self._docket.sidedata_filepath()
1507 1507 elif self.postfix is None:
1508 1508 self._datafile = b'%s.d' % self.radix
1509 1509 else:
1510 1510 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1511 1511
1512 1512 self.nodeconstants = sha1nodeconstants
1513 1513 self.nullid = self.nodeconstants.nullid
1514 1514
1515 1515 # sparse-revlog can't be on without general-delta (issue6056)
1516 1516 if not self.delta_config.general_delta:
1517 1517 self.delta_config.sparse_revlog = False
1518 1518
1519 1519 self._storedeltachains = True
1520 1520
1521 1521 devel_nodemap = (
1522 1522 self._nodemap_file
1523 1523 and force_nodemap
1524 1524 and parse_index_v1_nodemap is not None
1525 1525 )
1526 1526
1527 1527 use_rust_index = False
1528 1528 if rustrevlog is not None:
1529 1529 if self._nodemap_file is not None:
1530 1530 use_rust_index = True
1531 1531 else:
1532 1532 use_rust_index = self.opener.options.get(b'rust.index')
1533 1533
1534 1534 self._parse_index = parse_index_v1
1535 1535 if self._format_version == REVLOGV0:
1536 1536 self._parse_index = revlogv0.parse_index_v0
1537 1537 elif self._format_version == REVLOGV2:
1538 1538 self._parse_index = parse_index_v2
1539 1539 elif self._format_version == CHANGELOGV2:
1540 1540 self._parse_index = parse_index_cl_v2
1541 1541 elif devel_nodemap:
1542 1542 self._parse_index = parse_index_v1_nodemap
1543 1543 elif use_rust_index:
1544 1544 self._parse_index = parse_index_v1_mixed
1545 1545 try:
1546 1546 d = self._parse_index(index_data, self._inline)
1547 1547 index, chunkcache = d
1548 1548 use_nodemap = (
1549 1549 not self._inline
1550 1550 and self._nodemap_file is not None
1551 1551 and hasattr(index, 'update_nodemap_data')
1552 1552 )
1553 1553 if use_nodemap:
1554 1554 nodemap_data = nodemaputil.persisted_data(self)
1555 1555 if nodemap_data is not None:
1556 1556 docket = nodemap_data[0]
1557 1557 if (
1558 1558 len(d[0]) > docket.tip_rev
1559 1559 and d[0][docket.tip_rev][7] == docket.tip_node
1560 1560 ):
1561 1561 # no changelog tampering
1562 1562 self._nodemap_docket = docket
1563 1563 index.update_nodemap_data(*nodemap_data)
1564 1564 except (ValueError, IndexError):
1565 1565 raise error.RevlogError(
1566 1566 _(b"index %s is corrupted") % self.display_id
1567 1567 )
1568 1568 self.index = index
1569 1569 # revnum -> (chain-length, sum-delta-length)
1570 1570 self._chaininfocache = util.lrucachedict(500)
1571 1571
1572 1572 return chunkcache
1573 1573
1574 1574 def _load_inner(self, chunk_cache):
1575 1575 if self._docket is None:
1576 1576 default_compression_header = None
1577 1577 else:
1578 1578 default_compression_header = self._docket.default_compression_header
1579 1579
1580 1580 self._inner = _InnerRevlog(
1581 1581 opener=self.opener,
1582 1582 index=self.index,
1583 1583 index_file=self._indexfile,
1584 1584 data_file=self._datafile,
1585 1585 sidedata_file=self._sidedatafile,
1586 1586 inline=self._inline,
1587 1587 data_config=self.data_config,
1588 1588 delta_config=self.delta_config,
1589 1589 feature_config=self.feature_config,
1590 1590 chunk_cache=chunk_cache,
1591 1591 default_compression_header=default_compression_header,
1592 1592 )
1593 1593
1594 1594 def get_revlog(self):
1595 1595 """simple function to mirror API of other not-really-revlog API"""
1596 1596 return self
1597 1597
1598 1598 @util.propertycache
1599 1599 def revlog_kind(self):
1600 1600 return self.target[0]
1601 1601
1602 1602 @util.propertycache
1603 1603 def display_id(self):
1604 1604 """The public facing "ID" of the revlog that we use in message"""
1605 1605 if self.revlog_kind == KIND_FILELOG:
1606 1606 # Reference the file without the "data/" prefix, so it is familiar
1607 1607 # to the user.
1608 1608 return self.target[1]
1609 1609 else:
1610 1610 return self.radix
1611 1611
1612 1612 def _datafp(self, mode=b'r'):
1613 1613 """file object for the revlog's data file"""
1614 1614 return self.opener(self._datafile, mode=mode)
1615 1615
1616 1616 def tiprev(self):
1617 1617 return len(self.index) - 1
1618 1618
1619 1619 def tip(self):
1620 1620 return self.node(self.tiprev())
1621 1621
1622 1622 def __contains__(self, rev):
1623 1623 return 0 <= rev < len(self)
1624 1624
1625 1625 def __len__(self):
1626 1626 return len(self.index)
1627 1627
1628 1628 def __iter__(self):
1629 1629 return iter(range(len(self)))
1630 1630
1631 1631 def revs(self, start=0, stop=None):
1632 1632 """iterate over all rev in this revlog (from start to stop)"""
1633 1633 return storageutil.iterrevs(len(self), start=start, stop=stop)
1634 1634
1635 1635 def hasnode(self, node):
1636 1636 try:
1637 1637 self.rev(node)
1638 1638 return True
1639 1639 except KeyError:
1640 1640 return False
1641 1641
1642 1642 def _candelta(self, baserev, rev):
1643 1643 """whether two revisions (baserev, rev) can be delta-ed or not"""
1644 1644 # Disable delta if either rev requires a content-changing flag
1645 1645 # processor (ex. LFS). This is because such flag processor can alter
1646 1646 # the rawtext content that the delta will be based on, and two clients
1647 1647 # could have a same revlog node with different flags (i.e. different
1648 1648 # rawtext contents) and the delta could be incompatible.
1649 1649 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1650 1650 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1651 1651 ):
1652 1652 return False
1653 1653 return True
1654 1654
1655 1655 def update_caches(self, transaction):
1656 1656 """update on disk cache
1657 1657
1658 1658 If a transaction is passed, the update may be delayed to transaction
1659 1659 commit."""
1660 1660 if self._nodemap_file is not None:
1661 1661 if transaction is None:
1662 1662 nodemaputil.update_persistent_nodemap(self)
1663 1663 else:
1664 1664 nodemaputil.setup_persistent_nodemap(transaction, self)
1665 1665
1666 1666 def clearcaches(self):
1667 1667 """Clear in-memory caches"""
1668 self._revisioncache = None
1668 self._inner._revisioncache = None
1669 1669 self._chainbasecache.clear()
1670 1670 self._inner._segmentfile.clear_cache()
1671 1671 self._inner._segmentfile_sidedata.clear_cache()
1672 1672 self._pcache = {}
1673 1673 self._nodemap_docket = None
1674 1674 self.index.clearcaches()
1675 1675 # The python code is the one responsible for validating the docket, we
1676 1676 # end up having to refresh it here.
1677 1677 use_nodemap = (
1678 1678 not self._inline
1679 1679 and self._nodemap_file is not None
1680 1680 and hasattr(self.index, 'update_nodemap_data')
1681 1681 )
1682 1682 if use_nodemap:
1683 1683 nodemap_data = nodemaputil.persisted_data(self)
1684 1684 if nodemap_data is not None:
1685 1685 self._nodemap_docket = nodemap_data[0]
1686 1686 self.index.update_nodemap_data(*nodemap_data)
1687 1687
1688 1688 def rev(self, node):
1689 1689 """return the revision number associated with a <nodeid>"""
1690 1690 try:
1691 1691 return self.index.rev(node)
1692 1692 except TypeError:
1693 1693 raise
1694 1694 except error.RevlogError:
1695 1695 # parsers.c radix tree lookup failed
1696 1696 if (
1697 1697 node == self.nodeconstants.wdirid
1698 1698 or node in self.nodeconstants.wdirfilenodeids
1699 1699 ):
1700 1700 raise error.WdirUnsupported
1701 1701 raise error.LookupError(node, self.display_id, _(b'no node'))
1702 1702
1703 1703 # Accessors for index entries.
1704 1704
1705 1705 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1706 1706 # are flags.
1707 1707 def start(self, rev):
1708 1708 return int(self.index[rev][0] >> 16)
1709 1709
1710 1710 def sidedata_cut_off(self, rev):
1711 1711 sd_cut_off = self.index[rev][8]
1712 1712 if sd_cut_off != 0:
1713 1713 return sd_cut_off
1714 1714 # This is some annoying dance, because entries without sidedata
1715 1715 # currently use 0 as their ofsset. (instead of previous-offset +
1716 1716 # previous-size)
1717 1717 #
1718 1718 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1719 1719 # In the meantime, we need this.
1720 1720 while 0 <= rev:
1721 1721 e = self.index[rev]
1722 1722 if e[9] != 0:
1723 1723 return e[8] + e[9]
1724 1724 rev -= 1
1725 1725 return 0
1726 1726
1727 1727 def flags(self, rev):
1728 1728 return self.index[rev][0] & 0xFFFF
1729 1729
1730 1730 def length(self, rev):
1731 1731 return self.index[rev][1]
1732 1732
1733 1733 def sidedata_length(self, rev):
1734 1734 if not self.feature_config.has_side_data:
1735 1735 return 0
1736 1736 return self.index[rev][9]
1737 1737
1738 1738 def rawsize(self, rev):
1739 1739 """return the length of the uncompressed text for a given revision"""
1740 1740 l = self.index[rev][2]
1741 1741 if l >= 0:
1742 1742 return l
1743 1743
1744 1744 t = self.rawdata(rev)
1745 1745 return len(t)
1746 1746
1747 1747 def size(self, rev):
1748 1748 """length of non-raw text (processed by a "read" flag processor)"""
1749 1749 # fast path: if no "read" flag processor could change the content,
1750 1750 # size is rawsize. note: ELLIPSIS is known to not change the content.
1751 1751 flags = self.flags(rev)
1752 1752 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1753 1753 return self.rawsize(rev)
1754 1754
1755 1755 return len(self.revision(rev))
1756 1756
1757 1757 def fast_rank(self, rev):
1758 1758 """Return the rank of a revision if already known, or None otherwise.
1759 1759
1760 1760 The rank of a revision is the size of the sub-graph it defines as a
1761 1761 head. Equivalently, the rank of a revision `r` is the size of the set
1762 1762 `ancestors(r)`, `r` included.
1763 1763
1764 1764 This method returns the rank retrieved from the revlog in constant
1765 1765 time. It makes no attempt at computing unknown values for versions of
1766 1766 the revlog which do not persist the rank.
1767 1767 """
1768 1768 rank = self.index[rev][ENTRY_RANK]
1769 1769 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1770 1770 return None
1771 1771 if rev == nullrev:
1772 1772 return 0 # convention
1773 1773 return rank
1774 1774
1775 1775 def chainbase(self, rev):
1776 1776 base = self._chainbasecache.get(rev)
1777 1777 if base is not None:
1778 1778 return base
1779 1779
1780 1780 index = self.index
1781 1781 iterrev = rev
1782 1782 base = index[iterrev][3]
1783 1783 while base != iterrev:
1784 1784 iterrev = base
1785 1785 base = index[iterrev][3]
1786 1786
1787 1787 self._chainbasecache[rev] = base
1788 1788 return base
1789 1789
1790 1790 def linkrev(self, rev):
1791 1791 return self.index[rev][4]
1792 1792
1793 1793 def parentrevs(self, rev):
1794 1794 try:
1795 1795 entry = self.index[rev]
1796 1796 except IndexError:
1797 1797 if rev == wdirrev:
1798 1798 raise error.WdirUnsupported
1799 1799 raise
1800 1800
1801 1801 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1802 1802 return entry[6], entry[5]
1803 1803 else:
1804 1804 return entry[5], entry[6]
1805 1805
1806 1806 # fast parentrevs(rev) where rev isn't filtered
1807 1807 _uncheckedparentrevs = parentrevs
1808 1808
1809 1809 def node(self, rev):
1810 1810 try:
1811 1811 return self.index[rev][7]
1812 1812 except IndexError:
1813 1813 if rev == wdirrev:
1814 1814 raise error.WdirUnsupported
1815 1815 raise
1816 1816
1817 1817 # Derived from index values.
1818 1818
1819 1819 def end(self, rev):
1820 1820 return self.start(rev) + self.length(rev)
1821 1821
1822 1822 def parents(self, node):
1823 1823 i = self.index
1824 1824 d = i[self.rev(node)]
1825 1825 # inline node() to avoid function call overhead
1826 1826 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1827 1827 return i[d[6]][7], i[d[5]][7]
1828 1828 else:
1829 1829 return i[d[5]][7], i[d[6]][7]
1830 1830
1831 1831 def chainlen(self, rev):
1832 1832 return self._chaininfo(rev)[0]
1833 1833
1834 1834 def _chaininfo(self, rev):
1835 1835 chaininfocache = self._chaininfocache
1836 1836 if rev in chaininfocache:
1837 1837 return chaininfocache[rev]
1838 1838 index = self.index
1839 1839 generaldelta = self.delta_config.general_delta
1840 1840 iterrev = rev
1841 1841 e = index[iterrev]
1842 1842 clen = 0
1843 1843 compresseddeltalen = 0
1844 1844 while iterrev != e[3]:
1845 1845 clen += 1
1846 1846 compresseddeltalen += e[1]
1847 1847 if generaldelta:
1848 1848 iterrev = e[3]
1849 1849 else:
1850 1850 iterrev -= 1
1851 1851 if iterrev in chaininfocache:
1852 1852 t = chaininfocache[iterrev]
1853 1853 clen += t[0]
1854 1854 compresseddeltalen += t[1]
1855 1855 break
1856 1856 e = index[iterrev]
1857 1857 else:
1858 1858 # Add text length of base since decompressing that also takes
1859 1859 # work. For cache hits the length is already included.
1860 1860 compresseddeltalen += e[1]
1861 1861 r = (clen, compresseddeltalen)
1862 1862 chaininfocache[rev] = r
1863 1863 return r
1864 1864
1865 1865 def _deltachain(self, rev, stoprev=None):
1866 1866 return self._inner._deltachain(rev, stoprev=stoprev)
1867 1867
1868 1868 def ancestors(self, revs, stoprev=0, inclusive=False):
1869 1869 """Generate the ancestors of 'revs' in reverse revision order.
1870 1870 Does not generate revs lower than stoprev.
1871 1871
1872 1872 See the documentation for ancestor.lazyancestors for more details."""
1873 1873
1874 1874 # first, make sure start revisions aren't filtered
1875 1875 revs = list(revs)
1876 1876 checkrev = self.node
1877 1877 for r in revs:
1878 1878 checkrev(r)
1879 1879 # and we're sure ancestors aren't filtered as well
1880 1880
1881 1881 if rustancestor is not None and self.index.rust_ext_compat:
1882 1882 lazyancestors = rustancestor.LazyAncestors
1883 1883 arg = self.index
1884 1884 else:
1885 1885 lazyancestors = ancestor.lazyancestors
1886 1886 arg = self._uncheckedparentrevs
1887 1887 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1888 1888
1889 1889 def descendants(self, revs):
1890 1890 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1891 1891
1892 1892 def findcommonmissing(self, common=None, heads=None):
1893 1893 """Return a tuple of the ancestors of common and the ancestors of heads
1894 1894 that are not ancestors of common. In revset terminology, we return the
1895 1895 tuple:
1896 1896
1897 1897 ::common, (::heads) - (::common)
1898 1898
1899 1899 The list is sorted by revision number, meaning it is
1900 1900 topologically sorted.
1901 1901
1902 1902 'heads' and 'common' are both lists of node IDs. If heads is
1903 1903 not supplied, uses all of the revlog's heads. If common is not
1904 1904 supplied, uses nullid."""
1905 1905 if common is None:
1906 1906 common = [self.nullid]
1907 1907 if heads is None:
1908 1908 heads = self.heads()
1909 1909
1910 1910 common = [self.rev(n) for n in common]
1911 1911 heads = [self.rev(n) for n in heads]
1912 1912
1913 1913 # we want the ancestors, but inclusive
1914 1914 class lazyset:
1915 1915 def __init__(self, lazyvalues):
1916 1916 self.addedvalues = set()
1917 1917 self.lazyvalues = lazyvalues
1918 1918
1919 1919 def __contains__(self, value):
1920 1920 return value in self.addedvalues or value in self.lazyvalues
1921 1921
1922 1922 def __iter__(self):
1923 1923 added = self.addedvalues
1924 1924 for r in added:
1925 1925 yield r
1926 1926 for r in self.lazyvalues:
1927 1927 if not r in added:
1928 1928 yield r
1929 1929
1930 1930 def add(self, value):
1931 1931 self.addedvalues.add(value)
1932 1932
1933 1933 def update(self, values):
1934 1934 self.addedvalues.update(values)
1935 1935
1936 1936 has = lazyset(self.ancestors(common))
1937 1937 has.add(nullrev)
1938 1938 has.update(common)
1939 1939
1940 1940 # take all ancestors from heads that aren't in has
1941 1941 missing = set()
1942 1942 visit = collections.deque(r for r in heads if r not in has)
1943 1943 while visit:
1944 1944 r = visit.popleft()
1945 1945 if r in missing:
1946 1946 continue
1947 1947 else:
1948 1948 missing.add(r)
1949 1949 for p in self.parentrevs(r):
1950 1950 if p not in has:
1951 1951 visit.append(p)
1952 1952 missing = list(missing)
1953 1953 missing.sort()
1954 1954 return has, [self.node(miss) for miss in missing]
1955 1955
1956 1956 def incrementalmissingrevs(self, common=None):
1957 1957 """Return an object that can be used to incrementally compute the
1958 1958 revision numbers of the ancestors of arbitrary sets that are not
1959 1959 ancestors of common. This is an ancestor.incrementalmissingancestors
1960 1960 object.
1961 1961
1962 1962 'common' is a list of revision numbers. If common is not supplied, uses
1963 1963 nullrev.
1964 1964 """
1965 1965 if common is None:
1966 1966 common = [nullrev]
1967 1967
1968 1968 if rustancestor is not None and self.index.rust_ext_compat:
1969 1969 return rustancestor.MissingAncestors(self.index, common)
1970 1970 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1971 1971
1972 1972 def findmissingrevs(self, common=None, heads=None):
1973 1973 """Return the revision numbers of the ancestors of heads that
1974 1974 are not ancestors of common.
1975 1975
1976 1976 More specifically, return a list of revision numbers corresponding to
1977 1977 nodes N such that every N satisfies the following constraints:
1978 1978
1979 1979 1. N is an ancestor of some node in 'heads'
1980 1980 2. N is not an ancestor of any node in 'common'
1981 1981
1982 1982 The list is sorted by revision number, meaning it is
1983 1983 topologically sorted.
1984 1984
1985 1985 'heads' and 'common' are both lists of revision numbers. If heads is
1986 1986 not supplied, uses all of the revlog's heads. If common is not
1987 1987 supplied, uses nullid."""
1988 1988 if common is None:
1989 1989 common = [nullrev]
1990 1990 if heads is None:
1991 1991 heads = self.headrevs()
1992 1992
1993 1993 inc = self.incrementalmissingrevs(common=common)
1994 1994 return inc.missingancestors(heads)
1995 1995
1996 1996 def findmissing(self, common=None, heads=None):
1997 1997 """Return the ancestors of heads that are not ancestors of common.
1998 1998
1999 1999 More specifically, return a list of nodes N such that every N
2000 2000 satisfies the following constraints:
2001 2001
2002 2002 1. N is an ancestor of some node in 'heads'
2003 2003 2. N is not an ancestor of any node in 'common'
2004 2004
2005 2005 The list is sorted by revision number, meaning it is
2006 2006 topologically sorted.
2007 2007
2008 2008 'heads' and 'common' are both lists of node IDs. If heads is
2009 2009 not supplied, uses all of the revlog's heads. If common is not
2010 2010 supplied, uses nullid."""
2011 2011 if common is None:
2012 2012 common = [self.nullid]
2013 2013 if heads is None:
2014 2014 heads = self.heads()
2015 2015
2016 2016 common = [self.rev(n) for n in common]
2017 2017 heads = [self.rev(n) for n in heads]
2018 2018
2019 2019 inc = self.incrementalmissingrevs(common=common)
2020 2020 return [self.node(r) for r in inc.missingancestors(heads)]
2021 2021
2022 2022 def nodesbetween(self, roots=None, heads=None):
2023 2023 """Return a topological path from 'roots' to 'heads'.
2024 2024
2025 2025 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2026 2026 topologically sorted list of all nodes N that satisfy both of
2027 2027 these constraints:
2028 2028
2029 2029 1. N is a descendant of some node in 'roots'
2030 2030 2. N is an ancestor of some node in 'heads'
2031 2031
2032 2032 Every node is considered to be both a descendant and an ancestor
2033 2033 of itself, so every reachable node in 'roots' and 'heads' will be
2034 2034 included in 'nodes'.
2035 2035
2036 2036 'outroots' is the list of reachable nodes in 'roots', i.e., the
2037 2037 subset of 'roots' that is returned in 'nodes'. Likewise,
2038 2038 'outheads' is the subset of 'heads' that is also in 'nodes'.
2039 2039
2040 2040 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2041 2041 unspecified, uses nullid as the only root. If 'heads' is
2042 2042 unspecified, uses list of all of the revlog's heads."""
2043 2043 nonodes = ([], [], [])
2044 2044 if roots is not None:
2045 2045 roots = list(roots)
2046 2046 if not roots:
2047 2047 return nonodes
2048 2048 lowestrev = min([self.rev(n) for n in roots])
2049 2049 else:
2050 2050 roots = [self.nullid] # Everybody's a descendant of nullid
2051 2051 lowestrev = nullrev
2052 2052 if (lowestrev == nullrev) and (heads is None):
2053 2053 # We want _all_ the nodes!
2054 2054 return (
2055 2055 [self.node(r) for r in self],
2056 2056 [self.nullid],
2057 2057 list(self.heads()),
2058 2058 )
2059 2059 if heads is None:
2060 2060 # All nodes are ancestors, so the latest ancestor is the last
2061 2061 # node.
2062 2062 highestrev = len(self) - 1
2063 2063 # Set ancestors to None to signal that every node is an ancestor.
2064 2064 ancestors = None
2065 2065 # Set heads to an empty dictionary for later discovery of heads
2066 2066 heads = {}
2067 2067 else:
2068 2068 heads = list(heads)
2069 2069 if not heads:
2070 2070 return nonodes
2071 2071 ancestors = set()
2072 2072 # Turn heads into a dictionary so we can remove 'fake' heads.
2073 2073 # Also, later we will be using it to filter out the heads we can't
2074 2074 # find from roots.
2075 2075 heads = dict.fromkeys(heads, False)
2076 2076 # Start at the top and keep marking parents until we're done.
2077 2077 nodestotag = set(heads)
2078 2078 # Remember where the top was so we can use it as a limit later.
2079 2079 highestrev = max([self.rev(n) for n in nodestotag])
2080 2080 while nodestotag:
2081 2081 # grab a node to tag
2082 2082 n = nodestotag.pop()
2083 2083 # Never tag nullid
2084 2084 if n == self.nullid:
2085 2085 continue
2086 2086 # A node's revision number represents its place in a
2087 2087 # topologically sorted list of nodes.
2088 2088 r = self.rev(n)
2089 2089 if r >= lowestrev:
2090 2090 if n not in ancestors:
2091 2091 # If we are possibly a descendant of one of the roots
2092 2092 # and we haven't already been marked as an ancestor
2093 2093 ancestors.add(n) # Mark as ancestor
2094 2094 # Add non-nullid parents to list of nodes to tag.
2095 2095 nodestotag.update(
2096 2096 [p for p in self.parents(n) if p != self.nullid]
2097 2097 )
2098 2098 elif n in heads: # We've seen it before, is it a fake head?
2099 2099 # So it is, real heads should not be the ancestors of
2100 2100 # any other heads.
2101 2101 heads.pop(n)
2102 2102 if not ancestors:
2103 2103 return nonodes
2104 2104 # Now that we have our set of ancestors, we want to remove any
2105 2105 # roots that are not ancestors.
2106 2106
2107 2107 # If one of the roots was nullid, everything is included anyway.
2108 2108 if lowestrev > nullrev:
2109 2109 # But, since we weren't, let's recompute the lowest rev to not
2110 2110 # include roots that aren't ancestors.
2111 2111
2112 2112 # Filter out roots that aren't ancestors of heads
2113 2113 roots = [root for root in roots if root in ancestors]
2114 2114 # Recompute the lowest revision
2115 2115 if roots:
2116 2116 lowestrev = min([self.rev(root) for root in roots])
2117 2117 else:
2118 2118 # No more roots? Return empty list
2119 2119 return nonodes
2120 2120 else:
2121 2121 # We are descending from nullid, and don't need to care about
2122 2122 # any other roots.
2123 2123 lowestrev = nullrev
2124 2124 roots = [self.nullid]
2125 2125 # Transform our roots list into a set.
2126 2126 descendants = set(roots)
2127 2127 # Also, keep the original roots so we can filter out roots that aren't
2128 2128 # 'real' roots (i.e. are descended from other roots).
2129 2129 roots = descendants.copy()
2130 2130 # Our topologically sorted list of output nodes.
2131 2131 orderedout = []
2132 2132 # Don't start at nullid since we don't want nullid in our output list,
2133 2133 # and if nullid shows up in descendants, empty parents will look like
2134 2134 # they're descendants.
2135 2135 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2136 2136 n = self.node(r)
2137 2137 isdescendant = False
2138 2138 if lowestrev == nullrev: # Everybody is a descendant of nullid
2139 2139 isdescendant = True
2140 2140 elif n in descendants:
2141 2141 # n is already a descendant
2142 2142 isdescendant = True
2143 2143 # This check only needs to be done here because all the roots
2144 2144 # will start being marked is descendants before the loop.
2145 2145 if n in roots:
2146 2146 # If n was a root, check if it's a 'real' root.
2147 2147 p = tuple(self.parents(n))
2148 2148 # If any of its parents are descendants, it's not a root.
2149 2149 if (p[0] in descendants) or (p[1] in descendants):
2150 2150 roots.remove(n)
2151 2151 else:
2152 2152 p = tuple(self.parents(n))
2153 2153 # A node is a descendant if either of its parents are
2154 2154 # descendants. (We seeded the dependents list with the roots
2155 2155 # up there, remember?)
2156 2156 if (p[0] in descendants) or (p[1] in descendants):
2157 2157 descendants.add(n)
2158 2158 isdescendant = True
2159 2159 if isdescendant and ((ancestors is None) or (n in ancestors)):
2160 2160 # Only include nodes that are both descendants and ancestors.
2161 2161 orderedout.append(n)
2162 2162 if (ancestors is not None) and (n in heads):
2163 2163 # We're trying to figure out which heads are reachable
2164 2164 # from roots.
2165 2165 # Mark this head as having been reached
2166 2166 heads[n] = True
2167 2167 elif ancestors is None:
2168 2168 # Otherwise, we're trying to discover the heads.
2169 2169 # Assume this is a head because if it isn't, the next step
2170 2170 # will eventually remove it.
2171 2171 heads[n] = True
2172 2172 # But, obviously its parents aren't.
2173 2173 for p in self.parents(n):
2174 2174 heads.pop(p, None)
2175 2175 heads = [head for head, flag in heads.items() if flag]
2176 2176 roots = list(roots)
2177 2177 assert orderedout
2178 2178 assert roots
2179 2179 assert heads
2180 2180 return (orderedout, roots, heads)
2181 2181
2182 2182 def headrevs(self, revs=None):
2183 2183 if revs is None:
2184 2184 try:
2185 2185 return self.index.headrevs()
2186 2186 except AttributeError:
2187 2187 return self._headrevs()
2188 2188 if rustdagop is not None and self.index.rust_ext_compat:
2189 2189 return rustdagop.headrevs(self.index, revs)
2190 2190 return dagop.headrevs(revs, self._uncheckedparentrevs)
2191 2191
2192 2192 def computephases(self, roots):
2193 2193 return self.index.computephasesmapsets(roots)
2194 2194
2195 2195 def _headrevs(self):
2196 2196 count = len(self)
2197 2197 if not count:
2198 2198 return [nullrev]
2199 2199 # we won't iter over filtered rev so nobody is a head at start
2200 2200 ishead = [0] * (count + 1)
2201 2201 index = self.index
2202 2202 for r in self:
2203 2203 ishead[r] = 1 # I may be an head
2204 2204 e = index[r]
2205 2205 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2206 2206 return [r for r, val in enumerate(ishead) if val]
2207 2207
2208 2208 def heads(self, start=None, stop=None):
2209 2209 """return the list of all nodes that have no children
2210 2210
2211 2211 if start is specified, only heads that are descendants of
2212 2212 start will be returned
2213 2213 if stop is specified, it will consider all the revs from stop
2214 2214 as if they had no children
2215 2215 """
2216 2216 if start is None and stop is None:
2217 2217 if not len(self):
2218 2218 return [self.nullid]
2219 2219 return [self.node(r) for r in self.headrevs()]
2220 2220
2221 2221 if start is None:
2222 2222 start = nullrev
2223 2223 else:
2224 2224 start = self.rev(start)
2225 2225
2226 2226 stoprevs = {self.rev(n) for n in stop or []}
2227 2227
2228 2228 revs = dagop.headrevssubset(
2229 2229 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2230 2230 )
2231 2231
2232 2232 return [self.node(rev) for rev in revs]
2233 2233
2234 2234 def children(self, node):
2235 2235 """find the children of a given node"""
2236 2236 c = []
2237 2237 p = self.rev(node)
2238 2238 for r in self.revs(start=p + 1):
2239 2239 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2240 2240 if prevs:
2241 2241 for pr in prevs:
2242 2242 if pr == p:
2243 2243 c.append(self.node(r))
2244 2244 elif p == nullrev:
2245 2245 c.append(self.node(r))
2246 2246 return c
2247 2247
2248 2248 def commonancestorsheads(self, a, b):
2249 2249 """calculate all the heads of the common ancestors of nodes a and b"""
2250 2250 a, b = self.rev(a), self.rev(b)
2251 2251 ancs = self._commonancestorsheads(a, b)
2252 2252 return pycompat.maplist(self.node, ancs)
2253 2253
2254 2254 def _commonancestorsheads(self, *revs):
2255 2255 """calculate all the heads of the common ancestors of revs"""
2256 2256 try:
2257 2257 ancs = self.index.commonancestorsheads(*revs)
2258 2258 except (AttributeError, OverflowError): # C implementation failed
2259 2259 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2260 2260 return ancs
2261 2261
2262 2262 def isancestor(self, a, b):
2263 2263 """return True if node a is an ancestor of node b
2264 2264
2265 2265 A revision is considered an ancestor of itself."""
2266 2266 a, b = self.rev(a), self.rev(b)
2267 2267 return self.isancestorrev(a, b)
2268 2268
2269 2269 def isancestorrev(self, a, b):
2270 2270 """return True if revision a is an ancestor of revision b
2271 2271
2272 2272 A revision is considered an ancestor of itself.
2273 2273
2274 2274 The implementation of this is trivial but the use of
2275 2275 reachableroots is not."""
2276 2276 if a == nullrev:
2277 2277 return True
2278 2278 elif a == b:
2279 2279 return True
2280 2280 elif a > b:
2281 2281 return False
2282 2282 return bool(self.reachableroots(a, [b], [a], includepath=False))
2283 2283
2284 2284 def reachableroots(self, minroot, heads, roots, includepath=False):
2285 2285 """return (heads(::(<roots> and <roots>::<heads>)))
2286 2286
2287 2287 If includepath is True, return (<roots>::<heads>)."""
2288 2288 try:
2289 2289 return self.index.reachableroots2(
2290 2290 minroot, heads, roots, includepath
2291 2291 )
2292 2292 except AttributeError:
2293 2293 return dagop._reachablerootspure(
2294 2294 self.parentrevs, minroot, roots, heads, includepath
2295 2295 )
2296 2296
2297 2297 def ancestor(self, a, b):
2298 2298 """calculate the "best" common ancestor of nodes a and b"""
2299 2299
2300 2300 a, b = self.rev(a), self.rev(b)
2301 2301 try:
2302 2302 ancs = self.index.ancestors(a, b)
2303 2303 except (AttributeError, OverflowError):
2304 2304 ancs = ancestor.ancestors(self.parentrevs, a, b)
2305 2305 if ancs:
2306 2306 # choose a consistent winner when there's a tie
2307 2307 return min(map(self.node, ancs))
2308 2308 return self.nullid
2309 2309
2310 2310 def _match(self, id):
2311 2311 if isinstance(id, int):
2312 2312 # rev
2313 2313 return self.node(id)
2314 2314 if len(id) == self.nodeconstants.nodelen:
2315 2315 # possibly a binary node
2316 2316 # odds of a binary node being all hex in ASCII are 1 in 10**25
2317 2317 try:
2318 2318 node = id
2319 2319 self.rev(node) # quick search the index
2320 2320 return node
2321 2321 except error.LookupError:
2322 2322 pass # may be partial hex id
2323 2323 try:
2324 2324 # str(rev)
2325 2325 rev = int(id)
2326 2326 if b"%d" % rev != id:
2327 2327 raise ValueError
2328 2328 if rev < 0:
2329 2329 rev = len(self) + rev
2330 2330 if rev < 0 or rev >= len(self):
2331 2331 raise ValueError
2332 2332 return self.node(rev)
2333 2333 except (ValueError, OverflowError):
2334 2334 pass
2335 2335 if len(id) == 2 * self.nodeconstants.nodelen:
2336 2336 try:
2337 2337 # a full hex nodeid?
2338 2338 node = bin(id)
2339 2339 self.rev(node)
2340 2340 return node
2341 2341 except (binascii.Error, error.LookupError):
2342 2342 pass
2343 2343
2344 2344 def _partialmatch(self, id):
2345 2345 # we don't care wdirfilenodeids as they should be always full hash
2346 2346 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2347 2347 ambiguous = False
2348 2348 try:
2349 2349 partial = self.index.partialmatch(id)
2350 2350 if partial and self.hasnode(partial):
2351 2351 if maybewdir:
2352 2352 # single 'ff...' match in radix tree, ambiguous with wdir
2353 2353 ambiguous = True
2354 2354 else:
2355 2355 return partial
2356 2356 elif maybewdir:
2357 2357 # no 'ff...' match in radix tree, wdir identified
2358 2358 raise error.WdirUnsupported
2359 2359 else:
2360 2360 return None
2361 2361 except error.RevlogError:
2362 2362 # parsers.c radix tree lookup gave multiple matches
2363 2363 # fast path: for unfiltered changelog, radix tree is accurate
2364 2364 if not getattr(self, 'filteredrevs', None):
2365 2365 ambiguous = True
2366 2366 # fall through to slow path that filters hidden revisions
2367 2367 except (AttributeError, ValueError):
2368 2368 # we are pure python, or key is not hex
2369 2369 pass
2370 2370 if ambiguous:
2371 2371 raise error.AmbiguousPrefixLookupError(
2372 2372 id, self.display_id, _(b'ambiguous identifier')
2373 2373 )
2374 2374
2375 2375 if id in self._pcache:
2376 2376 return self._pcache[id]
2377 2377
2378 2378 if len(id) <= 40:
2379 2379 # hex(node)[:...]
2380 2380 l = len(id) // 2 * 2 # grab an even number of digits
2381 2381 try:
2382 2382 # we're dropping the last digit, so let's check that it's hex,
2383 2383 # to avoid the expensive computation below if it's not
2384 2384 if len(id) % 2 > 0:
2385 2385 if not (id[-1] in hexdigits):
2386 2386 return None
2387 2387 prefix = bin(id[:l])
2388 2388 except binascii.Error:
2389 2389 pass
2390 2390 else:
2391 2391 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2392 2392 nl = [
2393 2393 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2394 2394 ]
2395 2395 if self.nodeconstants.nullhex.startswith(id):
2396 2396 nl.append(self.nullid)
2397 2397 if len(nl) > 0:
2398 2398 if len(nl) == 1 and not maybewdir:
2399 2399 self._pcache[id] = nl[0]
2400 2400 return nl[0]
2401 2401 raise error.AmbiguousPrefixLookupError(
2402 2402 id, self.display_id, _(b'ambiguous identifier')
2403 2403 )
2404 2404 if maybewdir:
2405 2405 raise error.WdirUnsupported
2406 2406 return None
2407 2407
2408 2408 def lookup(self, id):
2409 2409 """locate a node based on:
2410 2410 - revision number or str(revision number)
2411 2411 - nodeid or subset of hex nodeid
2412 2412 """
2413 2413 n = self._match(id)
2414 2414 if n is not None:
2415 2415 return n
2416 2416 n = self._partialmatch(id)
2417 2417 if n:
2418 2418 return n
2419 2419
2420 2420 raise error.LookupError(id, self.display_id, _(b'no match found'))
2421 2421
2422 2422 def shortest(self, node, minlength=1):
2423 2423 """Find the shortest unambiguous prefix that matches node."""
2424 2424
2425 2425 def isvalid(prefix):
2426 2426 try:
2427 2427 matchednode = self._partialmatch(prefix)
2428 2428 except error.AmbiguousPrefixLookupError:
2429 2429 return False
2430 2430 except error.WdirUnsupported:
2431 2431 # single 'ff...' match
2432 2432 return True
2433 2433 if matchednode is None:
2434 2434 raise error.LookupError(node, self.display_id, _(b'no node'))
2435 2435 return True
2436 2436
2437 2437 def maybewdir(prefix):
2438 2438 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2439 2439
2440 2440 hexnode = hex(node)
2441 2441
2442 2442 def disambiguate(hexnode, minlength):
2443 2443 """Disambiguate against wdirid."""
2444 2444 for length in range(minlength, len(hexnode) + 1):
2445 2445 prefix = hexnode[:length]
2446 2446 if not maybewdir(prefix):
2447 2447 return prefix
2448 2448
2449 2449 if not getattr(self, 'filteredrevs', None):
2450 2450 try:
2451 2451 length = max(self.index.shortest(node), minlength)
2452 2452 return disambiguate(hexnode, length)
2453 2453 except error.RevlogError:
2454 2454 if node != self.nodeconstants.wdirid:
2455 2455 raise error.LookupError(
2456 2456 node, self.display_id, _(b'no node')
2457 2457 )
2458 2458 except AttributeError:
2459 2459 # Fall through to pure code
2460 2460 pass
2461 2461
2462 2462 if node == self.nodeconstants.wdirid:
2463 2463 for length in range(minlength, len(hexnode) + 1):
2464 2464 prefix = hexnode[:length]
2465 2465 if isvalid(prefix):
2466 2466 return prefix
2467 2467
2468 2468 for length in range(minlength, len(hexnode) + 1):
2469 2469 prefix = hexnode[:length]
2470 2470 if isvalid(prefix):
2471 2471 return disambiguate(hexnode, length)
2472 2472
2473 2473 def cmp(self, node, text):
2474 2474 """compare text with a given file revision
2475 2475
2476 2476 returns True if text is different than what is stored.
2477 2477 """
2478 2478 p1, p2 = self.parents(node)
2479 2479 return storageutil.hashrevisionsha1(text, p1, p2) != node
2480 2480
2481 2481 def deltaparent(self, rev):
2482 2482 """return deltaparent of the given revision"""
2483 2483 base = self.index[rev][3]
2484 2484 if base == rev:
2485 2485 return nullrev
2486 2486 elif self.delta_config.general_delta:
2487 2487 return base
2488 2488 else:
2489 2489 return rev - 1
2490 2490
2491 2491 def issnapshot(self, rev):
2492 2492 """tells whether rev is a snapshot"""
2493 2493 ret = self._inner.issnapshot(rev)
2494 2494 self.issnapshot = self._inner.issnapshot
2495 2495 return ret
2496 2496
2497 2497 def snapshotdepth(self, rev):
2498 2498 """number of snapshot in the chain before this one"""
2499 2499 if not self.issnapshot(rev):
2500 2500 raise error.ProgrammingError(b'revision %d not a snapshot')
2501 2501 return len(self._inner._deltachain(rev)[0]) - 1
2502 2502
2503 2503 def revdiff(self, rev1, rev2):
2504 2504 """return or calculate a delta between two revisions
2505 2505
2506 2506 The delta calculated is in binary form and is intended to be written to
2507 2507 revlog data directly. So this function needs raw revision data.
2508 2508 """
2509 2509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2510 2510 return bytes(self._inner._chunk(rev2))
2511 2511
2512 2512 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2513 2513
2514 2514 def revision(self, nodeorrev):
2515 2515 """return an uncompressed revision of a given node or revision
2516 2516 number.
2517 2517 """
2518 2518 return self._revisiondata(nodeorrev)
2519 2519
2520 2520 def sidedata(self, nodeorrev):
2521 2521 """a map of extra data related to the changeset but not part of the hash
2522 2522
2523 2523 This function currently return a dictionary. However, more advanced
2524 2524 mapping object will likely be used in the future for a more
2525 2525 efficient/lazy code.
2526 2526 """
2527 2527 # deal with <nodeorrev> argument type
2528 2528 if isinstance(nodeorrev, int):
2529 2529 rev = nodeorrev
2530 2530 else:
2531 2531 rev = self.rev(nodeorrev)
2532 2532 return self._sidedata(rev)
2533 2533
2534 2534 def _revisiondata(self, nodeorrev, raw=False):
2535 2535 # deal with <nodeorrev> argument type
2536 2536 if isinstance(nodeorrev, int):
2537 2537 rev = nodeorrev
2538 2538 node = self.node(rev)
2539 2539 else:
2540 2540 node = nodeorrev
2541 2541 rev = None
2542 2542
2543 2543 # fast path the special `nullid` rev
2544 2544 if node == self.nullid:
2545 2545 return b""
2546 2546
2547 2547 # ``rawtext`` is the text as stored inside the revlog. Might be the
2548 2548 # revision or might need to be processed to retrieve the revision.
2549 2549 rev, rawtext, validated = self._rawtext(node, rev)
2550 2550
2551 2551 if raw and validated:
2552 2552 # if we don't want to process the raw text and that raw
2553 2553 # text is cached, we can exit early.
2554 2554 return rawtext
2555 2555 if rev is None:
2556 2556 rev = self.rev(node)
2557 2557 # the revlog's flag for this revision
2558 2558 # (usually alter its state or content)
2559 2559 flags = self.flags(rev)
2560 2560
2561 2561 if validated and flags == REVIDX_DEFAULT_FLAGS:
2562 2562 # no extra flags set, no flag processor runs, text = rawtext
2563 2563 return rawtext
2564 2564
2565 2565 if raw:
2566 2566 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2567 2567 text = rawtext
2568 2568 else:
2569 2569 r = flagutil.processflagsread(self, rawtext, flags)
2570 2570 text, validatehash = r
2571 2571 if validatehash:
2572 2572 self.checkhash(text, node, rev=rev)
2573 2573 if not validated:
2574 self._revisioncache = (node, rev, rawtext)
2574 self._inner._revisioncache = (node, rev, rawtext)
2575 2575
2576 2576 return text
2577 2577
2578 2578 def _rawtext(self, node, rev):
2579 2579 """return the possibly unvalidated rawtext for a revision
2580 2580
2581 2581 returns (rev, rawtext, validated)
2582 2582 """
2583 2583
2584 2584 # revision in the cache (could be useful to apply delta)
2585 2585 cachedrev = None
2586 2586 # An intermediate text to apply deltas to
2587 2587 basetext = None
2588 2588
2589 2589 # Check if we have the entry in cache
2590 2590 # The cache entry looks like (node, rev, rawtext)
2591 if self._revisioncache:
2592 if self._revisioncache[0] == node:
2593 return (rev, self._revisioncache[2], True)
2594 cachedrev = self._revisioncache[1]
2591 if self._inner._revisioncache:
2592 if self._inner._revisioncache[0] == node:
2593 return (rev, self._inner._revisioncache[2], True)
2594 cachedrev = self._inner._revisioncache[1]
2595 2595
2596 2596 if rev is None:
2597 2597 rev = self.rev(node)
2598 2598
2599 2599 chain, stopped = self._inner._deltachain(rev, stoprev=cachedrev)
2600 2600 if stopped:
2601 basetext = self._revisioncache[2]
2601 basetext = self._inner._revisioncache[2]
2602 2602
2603 2603 # drop cache to save memory, the caller is expected to
2604 # update self._revisioncache after validating the text
2605 self._revisioncache = None
2604 # update self._inner._revisioncache after validating the text
2605 self._inner._revisioncache = None
2606 2606
2607 2607 targetsize = None
2608 2608 rawsize = self.index[rev][2]
2609 2609 if 0 <= rawsize:
2610 2610 targetsize = 4 * rawsize
2611 2611
2612 2612 bins = self._inner._chunks(chain, targetsize=targetsize)
2613 2613 if basetext is None:
2614 2614 basetext = bytes(bins[0])
2615 2615 bins = bins[1:]
2616 2616
2617 2617 rawtext = mdiff.patches(basetext, bins)
2618 2618 del basetext # let us have a chance to free memory early
2619 2619 return (rev, rawtext, False)
2620 2620
2621 2621 def _sidedata(self, rev):
2622 2622 """Return the sidedata for a given revision number."""
2623 2623 index_entry = self.index[rev]
2624 2624 sidedata_offset = index_entry[8]
2625 2625 sidedata_size = index_entry[9]
2626 2626
2627 2627 if self._inline:
2628 2628 sidedata_offset += self.index.entry_size * (1 + rev)
2629 2629 if sidedata_size == 0:
2630 2630 return {}
2631 2631
2632 2632 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2633 2633 filename = self._sidedatafile
2634 2634 end = self._docket.sidedata_end
2635 2635 offset = sidedata_offset
2636 2636 length = sidedata_size
2637 2637 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2638 2638 raise error.RevlogError(m)
2639 2639
2640 2640 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2641 2641 sidedata_offset, sidedata_size
2642 2642 )
2643 2643
2644 2644 comp = self.index[rev][11]
2645 2645 if comp == COMP_MODE_PLAIN:
2646 2646 segment = comp_segment
2647 2647 elif comp == COMP_MODE_DEFAULT:
2648 2648 segment = self._inner._decompressor(comp_segment)
2649 2649 elif comp == COMP_MODE_INLINE:
2650 2650 segment = self._inner.decompress(comp_segment)
2651 2651 else:
2652 2652 msg = b'unknown compression mode %d'
2653 2653 msg %= comp
2654 2654 raise error.RevlogError(msg)
2655 2655
2656 2656 sidedata = sidedatautil.deserialize_sidedata(segment)
2657 2657 return sidedata
2658 2658
2659 2659 def rawdata(self, nodeorrev):
2660 2660 """return an uncompressed raw data of a given node or revision number."""
2661 2661 return self._revisiondata(nodeorrev, raw=True)
2662 2662
2663 2663 def hash(self, text, p1, p2):
2664 2664 """Compute a node hash.
2665 2665
2666 2666 Available as a function so that subclasses can replace the hash
2667 2667 as needed.
2668 2668 """
2669 2669 return storageutil.hashrevisionsha1(text, p1, p2)
2670 2670
2671 2671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2672 2672 """Check node hash integrity.
2673 2673
2674 2674 Available as a function so that subclasses can extend hash mismatch
2675 2675 behaviors as needed.
2676 2676 """
2677 2677 try:
2678 2678 if p1 is None and p2 is None:
2679 2679 p1, p2 = self.parents(node)
2680 2680 if node != self.hash(text, p1, p2):
2681 2681 # Clear the revision cache on hash failure. The revision cache
2682 2682 # only stores the raw revision and clearing the cache does have
2683 2683 # the side-effect that we won't have a cache hit when the raw
2684 2684 # revision data is accessed. But this case should be rare and
2685 2685 # it is extra work to teach the cache about the hash
2686 2686 # verification state.
2687 if self._revisioncache and self._revisioncache[0] == node:
2688 self._revisioncache = None
2687 if (
2688 self._inner._revisioncache
2689 and self._inner._revisioncache[0] == node
2690 ):
2691 self._inner._revisioncache = None
2689 2692
2690 2693 revornode = rev
2691 2694 if revornode is None:
2692 2695 revornode = templatefilters.short(hex(node))
2693 2696 raise error.RevlogError(
2694 2697 _(b"integrity check failed on %s:%s")
2695 2698 % (self.display_id, pycompat.bytestr(revornode))
2696 2699 )
2697 2700 except error.RevlogError:
2698 2701 if self.feature_config.censorable and storageutil.iscensoredtext(
2699 2702 text
2700 2703 ):
2701 2704 raise error.CensoredNodeError(self.display_id, node, text)
2702 2705 raise
2703 2706
2704 2707 @property
2705 2708 def _split_index_file(self):
2706 2709 """the path where to expect the index of an ongoing splitting operation
2707 2710
2708 2711 The file will only exist if a splitting operation is in progress, but
2709 2712 it is always expected at the same location."""
2710 2713 parts = self.radix.split(b'/')
2711 2714 if len(parts) > 1:
2712 2715 # adds a '-s' prefix to the ``data/` or `meta/` base
2713 2716 head = parts[0] + b'-s'
2714 2717 mids = parts[1:-1]
2715 2718 tail = parts[-1] + b'.i'
2716 2719 pieces = [head] + mids + [tail]
2717 2720 return b'/'.join(pieces)
2718 2721 else:
2719 2722 # the revlog is stored at the root of the store (changelog or
2720 2723 # manifest), no risk of collision.
2721 2724 return self.radix + b'.i.s'
2722 2725
2723 2726 def _enforceinlinesize(self, tr, side_write=True):
2724 2727 """Check if the revlog is too big for inline and convert if so.
2725 2728
2726 2729 This should be called after revisions are added to the revlog. If the
2727 2730 revlog has grown too large to be an inline revlog, it will convert it
2728 2731 to use multiple index and data files.
2729 2732 """
2730 2733 tiprev = len(self) - 1
2731 2734 total_size = self.start(tiprev) + self.length(tiprev)
2732 2735 if not self._inline or total_size < _maxinline:
2733 2736 return
2734 2737
2735 2738 if self._docket is not None:
2736 2739 msg = b"inline revlog should not have a docket"
2737 2740 raise error.ProgrammingError(msg)
2738 2741
2739 2742 troffset = tr.findoffset(self._indexfile)
2740 2743 if troffset is None:
2741 2744 raise error.RevlogError(
2742 2745 _(b"%s not found in the transaction") % self._indexfile
2743 2746 )
2744 2747 if troffset:
2745 2748 tr.addbackup(self._indexfile, for_offset=True)
2746 2749 tr.add(self._datafile, 0)
2747 2750
2748 2751 new_index_file_path = None
2749 2752 if side_write:
2750 2753 old_index_file_path = self._indexfile
2751 2754 new_index_file_path = self._split_index_file
2752 2755 opener = self.opener
2753 2756 weak_self = weakref.ref(self)
2754 2757
2755 2758 # the "split" index replace the real index when the transaction is
2756 2759 # finalized
2757 2760 def finalize_callback(tr):
2758 2761 opener.rename(
2759 2762 new_index_file_path,
2760 2763 old_index_file_path,
2761 2764 checkambig=True,
2762 2765 )
2763 2766 maybe_self = weak_self()
2764 2767 if maybe_self is not None:
2765 2768 maybe_self._indexfile = old_index_file_path
2766 2769 maybe_self._inner.index_file = maybe_self._indexfile
2767 2770
2768 2771 def abort_callback(tr):
2769 2772 maybe_self = weak_self()
2770 2773 if maybe_self is not None:
2771 2774 maybe_self._indexfile = old_index_file_path
2772 2775 maybe_self._inner.inline = True
2773 2776 maybe_self._inner.index_file = old_index_file_path
2774 2777
2775 2778 tr.registertmp(new_index_file_path)
2776 2779 if self.target[1] is not None:
2777 2780 callback_id = b'000-revlog-split-%d-%s' % self.target
2778 2781 else:
2779 2782 callback_id = b'000-revlog-split-%d' % self.target[0]
2780 2783 tr.addfinalize(callback_id, finalize_callback)
2781 2784 tr.addabort(callback_id, abort_callback)
2782 2785
2783 2786 self._format_flags &= ~FLAG_INLINE_DATA
2784 2787 self._inner.split_inline(
2785 2788 tr,
2786 2789 self._format_flags | self._format_version,
2787 2790 new_index_file_path=new_index_file_path,
2788 2791 )
2789 2792
2790 2793 self._inline = False
2791 2794 if new_index_file_path is not None:
2792 2795 self._indexfile = new_index_file_path
2793 2796
2794 2797 nodemaputil.setup_persistent_nodemap(tr, self)
2795 2798
2796 2799 def _nodeduplicatecallback(self, transaction, node):
2797 2800 """called when trying to add a node already stored."""
2798 2801
2799 2802 @contextlib.contextmanager
2800 2803 def reading(self):
2801 2804 with self._inner.reading():
2802 2805 yield
2803 2806
2804 2807 @contextlib.contextmanager
2805 2808 def _writing(self, transaction):
2806 2809 if self._trypending:
2807 2810 msg = b'try to write in a `trypending` revlog: %s'
2808 2811 msg %= self.display_id
2809 2812 raise error.ProgrammingError(msg)
2810 2813 if self._inner.is_writing:
2811 2814 yield
2812 2815 else:
2813 2816 data_end = None
2814 2817 sidedata_end = None
2815 2818 if self._docket is not None:
2816 2819 data_end = self._docket.data_end
2817 2820 sidedata_end = self._docket.sidedata_end
2818 2821 with self._inner.writing(
2819 2822 transaction,
2820 2823 data_end=data_end,
2821 2824 sidedata_end=sidedata_end,
2822 2825 ):
2823 2826 yield
2824 2827 if self._docket is not None:
2825 2828 self._write_docket(transaction)
2826 2829
2827 2830 def _write_docket(self, transaction):
2828 2831 """write the current docket on disk
2829 2832
2830 2833 Exist as a method to help changelog to implement transaction logic
2831 2834
2832 2835 We could also imagine using the same transaction logic for all revlog
2833 2836 since docket are cheap."""
2834 2837 self._docket.write(transaction)
2835 2838
2836 2839 def addrevision(
2837 2840 self,
2838 2841 text,
2839 2842 transaction,
2840 2843 link,
2841 2844 p1,
2842 2845 p2,
2843 2846 cachedelta=None,
2844 2847 node=None,
2845 2848 flags=REVIDX_DEFAULT_FLAGS,
2846 2849 deltacomputer=None,
2847 2850 sidedata=None,
2848 2851 ):
2849 2852 """add a revision to the log
2850 2853
2851 2854 text - the revision data to add
2852 2855 transaction - the transaction object used for rollback
2853 2856 link - the linkrev data to add
2854 2857 p1, p2 - the parent nodeids of the revision
2855 2858 cachedelta - an optional precomputed delta
2856 2859 node - nodeid of revision; typically node is not specified, and it is
2857 2860 computed by default as hash(text, p1, p2), however subclasses might
2858 2861 use different hashing method (and override checkhash() in such case)
2859 2862 flags - the known flags to set on the revision
2860 2863 deltacomputer - an optional deltacomputer instance shared between
2861 2864 multiple calls
2862 2865 """
2863 2866 if link == nullrev:
2864 2867 raise error.RevlogError(
2865 2868 _(b"attempted to add linkrev -1 to %s") % self.display_id
2866 2869 )
2867 2870
2868 2871 if sidedata is None:
2869 2872 sidedata = {}
2870 2873 elif sidedata and not self.feature_config.has_side_data:
2871 2874 raise error.ProgrammingError(
2872 2875 _(b"trying to add sidedata to a revlog who don't support them")
2873 2876 )
2874 2877
2875 2878 if flags:
2876 2879 node = node or self.hash(text, p1, p2)
2877 2880
2878 2881 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2879 2882
2880 2883 # If the flag processor modifies the revision data, ignore any provided
2881 2884 # cachedelta.
2882 2885 if rawtext != text:
2883 2886 cachedelta = None
2884 2887
2885 2888 if len(rawtext) > _maxentrysize:
2886 2889 raise error.RevlogError(
2887 2890 _(
2888 2891 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2889 2892 )
2890 2893 % (self.display_id, len(rawtext))
2891 2894 )
2892 2895
2893 2896 node = node or self.hash(rawtext, p1, p2)
2894 2897 rev = self.index.get_rev(node)
2895 2898 if rev is not None:
2896 2899 return rev
2897 2900
2898 2901 if validatehash:
2899 2902 self.checkhash(rawtext, node, p1=p1, p2=p2)
2900 2903
2901 2904 return self.addrawrevision(
2902 2905 rawtext,
2903 2906 transaction,
2904 2907 link,
2905 2908 p1,
2906 2909 p2,
2907 2910 node,
2908 2911 flags,
2909 2912 cachedelta=cachedelta,
2910 2913 deltacomputer=deltacomputer,
2911 2914 sidedata=sidedata,
2912 2915 )
2913 2916
2914 2917 def addrawrevision(
2915 2918 self,
2916 2919 rawtext,
2917 2920 transaction,
2918 2921 link,
2919 2922 p1,
2920 2923 p2,
2921 2924 node,
2922 2925 flags,
2923 2926 cachedelta=None,
2924 2927 deltacomputer=None,
2925 2928 sidedata=None,
2926 2929 ):
2927 2930 """add a raw revision with known flags, node and parents
2928 2931 useful when reusing a revision not stored in this revlog (ex: received
2929 2932 over wire, or read from an external bundle).
2930 2933 """
2931 2934 with self._writing(transaction):
2932 2935 return self._addrevision(
2933 2936 node,
2934 2937 rawtext,
2935 2938 transaction,
2936 2939 link,
2937 2940 p1,
2938 2941 p2,
2939 2942 flags,
2940 2943 cachedelta,
2941 2944 deltacomputer=deltacomputer,
2942 2945 sidedata=sidedata,
2943 2946 )
2944 2947
2945 2948 def compress(self, data):
2946 2949 return self._inner.compress(data)
2947 2950
2948 2951 def decompress(self, data):
2949 2952 return self._inner.decompress(data)
2950 2953
2951 2954 def _addrevision(
2952 2955 self,
2953 2956 node,
2954 2957 rawtext,
2955 2958 transaction,
2956 2959 link,
2957 2960 p1,
2958 2961 p2,
2959 2962 flags,
2960 2963 cachedelta,
2961 2964 alwayscache=False,
2962 2965 deltacomputer=None,
2963 2966 sidedata=None,
2964 2967 ):
2965 2968 """internal function to add revisions to the log
2966 2969
2967 2970 see addrevision for argument descriptions.
2968 2971
2969 2972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2970 2973
2971 2974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2972 2975 be used.
2973 2976
2974 2977 invariants:
2975 2978 - rawtext is optional (can be None); if not set, cachedelta must be set.
2976 2979 if both are set, they must correspond to each other.
2977 2980 """
2978 2981 if node == self.nullid:
2979 2982 raise error.RevlogError(
2980 2983 _(b"%s: attempt to add null revision") % self.display_id
2981 2984 )
2982 2985 if (
2983 2986 node == self.nodeconstants.wdirid
2984 2987 or node in self.nodeconstants.wdirfilenodeids
2985 2988 ):
2986 2989 raise error.RevlogError(
2987 2990 _(b"%s: attempt to add wdir revision") % self.display_id
2988 2991 )
2989 2992 if self._inner._writinghandles is None:
2990 2993 msg = b'adding revision outside `revlog._writing` context'
2991 2994 raise error.ProgrammingError(msg)
2992 2995
2993 2996 btext = [rawtext]
2994 2997
2995 2998 curr = len(self)
2996 2999 prev = curr - 1
2997 3000
2998 3001 offset = self._get_data_offset(prev)
2999 3002
3000 3003 if self._concurrencychecker:
3001 3004 ifh, dfh, sdfh = self._inner._writinghandles
3002 3005 # XXX no checking for the sidedata file
3003 3006 if self._inline:
3004 3007 # offset is "as if" it were in the .d file, so we need to add on
3005 3008 # the size of the entry metadata.
3006 3009 self._concurrencychecker(
3007 3010 ifh, self._indexfile, offset + curr * self.index.entry_size
3008 3011 )
3009 3012 else:
3010 3013 # Entries in the .i are a consistent size.
3011 3014 self._concurrencychecker(
3012 3015 ifh, self._indexfile, curr * self.index.entry_size
3013 3016 )
3014 3017 self._concurrencychecker(dfh, self._datafile, offset)
3015 3018
3016 3019 p1r, p2r = self.rev(p1), self.rev(p2)
3017 3020
3018 3021 # full versions are inserted when the needed deltas
3019 3022 # become comparable to the uncompressed text
3020 3023 if rawtext is None:
3021 3024 # need rawtext size, before changed by flag processors, which is
3022 3025 # the non-raw size. use revlog explicitly to avoid filelog's extra
3023 3026 # logic that might remove metadata size.
3024 3027 textlen = mdiff.patchedsize(
3025 3028 revlog.size(self, cachedelta[0]), cachedelta[1]
3026 3029 )
3027 3030 else:
3028 3031 textlen = len(rawtext)
3029 3032
3030 3033 if deltacomputer is None:
3031 3034 write_debug = None
3032 3035 if self.delta_config.debug_delta:
3033 3036 write_debug = transaction._report
3034 3037 deltacomputer = deltautil.deltacomputer(
3035 3038 self, write_debug=write_debug
3036 3039 )
3037 3040
3038 3041 if cachedelta is not None and len(cachedelta) == 2:
3039 3042 # If the cached delta has no information about how it should be
3040 3043 # reused, add the default reuse instruction according to the
3041 3044 # revlog's configuration.
3042 3045 if (
3043 3046 self.delta_config.general_delta
3044 3047 and self.delta_config.lazy_delta_base
3045 3048 ):
3046 3049 delta_base_reuse = DELTA_BASE_REUSE_TRY
3047 3050 else:
3048 3051 delta_base_reuse = DELTA_BASE_REUSE_NO
3049 3052 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3050 3053
3051 3054 revinfo = revlogutils.revisioninfo(
3052 3055 node,
3053 3056 p1,
3054 3057 p2,
3055 3058 btext,
3056 3059 textlen,
3057 3060 cachedelta,
3058 3061 flags,
3059 3062 )
3060 3063
3061 3064 deltainfo = deltacomputer.finddeltainfo(revinfo)
3062 3065
3063 3066 compression_mode = COMP_MODE_INLINE
3064 3067 if self._docket is not None:
3065 3068 default_comp = self._docket.default_compression_header
3066 3069 r = deltautil.delta_compression(default_comp, deltainfo)
3067 3070 compression_mode, deltainfo = r
3068 3071
3069 3072 sidedata_compression_mode = COMP_MODE_INLINE
3070 3073 if sidedata and self.feature_config.has_side_data:
3071 3074 sidedata_compression_mode = COMP_MODE_PLAIN
3072 3075 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3073 3076 sidedata_offset = self._docket.sidedata_end
3074 3077 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3075 3078 if (
3076 3079 h != b'u'
3077 3080 and comp_sidedata[0:1] != b'\0'
3078 3081 and len(comp_sidedata) < len(serialized_sidedata)
3079 3082 ):
3080 3083 assert not h
3081 3084 if (
3082 3085 comp_sidedata[0:1]
3083 3086 == self._docket.default_compression_header
3084 3087 ):
3085 3088 sidedata_compression_mode = COMP_MODE_DEFAULT
3086 3089 serialized_sidedata = comp_sidedata
3087 3090 else:
3088 3091 sidedata_compression_mode = COMP_MODE_INLINE
3089 3092 serialized_sidedata = comp_sidedata
3090 3093 else:
3091 3094 serialized_sidedata = b""
3092 3095 # Don't store the offset if the sidedata is empty, that way
3093 3096 # we can easily detect empty sidedata and they will be no different
3094 3097 # than ones we manually add.
3095 3098 sidedata_offset = 0
3096 3099
3097 3100 rank = RANK_UNKNOWN
3098 3101 if self.feature_config.compute_rank:
3099 3102 if (p1r, p2r) == (nullrev, nullrev):
3100 3103 rank = 1
3101 3104 elif p1r != nullrev and p2r == nullrev:
3102 3105 rank = 1 + self.fast_rank(p1r)
3103 3106 elif p1r == nullrev and p2r != nullrev:
3104 3107 rank = 1 + self.fast_rank(p2r)
3105 3108 else: # merge node
3106 3109 if rustdagop is not None and self.index.rust_ext_compat:
3107 3110 rank = rustdagop.rank(self.index, p1r, p2r)
3108 3111 else:
3109 3112 pmin, pmax = sorted((p1r, p2r))
3110 3113 rank = 1 + self.fast_rank(pmax)
3111 3114 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3112 3115
3113 3116 e = revlogutils.entry(
3114 3117 flags=flags,
3115 3118 data_offset=offset,
3116 3119 data_compressed_length=deltainfo.deltalen,
3117 3120 data_uncompressed_length=textlen,
3118 3121 data_compression_mode=compression_mode,
3119 3122 data_delta_base=deltainfo.base,
3120 3123 link_rev=link,
3121 3124 parent_rev_1=p1r,
3122 3125 parent_rev_2=p2r,
3123 3126 node_id=node,
3124 3127 sidedata_offset=sidedata_offset,
3125 3128 sidedata_compressed_length=len(serialized_sidedata),
3126 3129 sidedata_compression_mode=sidedata_compression_mode,
3127 3130 rank=rank,
3128 3131 )
3129 3132
3130 3133 self.index.append(e)
3131 3134 entry = self.index.entry_binary(curr)
3132 3135 if curr == 0 and self._docket is None:
3133 3136 header = self._format_flags | self._format_version
3134 3137 header = self.index.pack_header(header)
3135 3138 entry = header + entry
3136 3139 self._writeentry(
3137 3140 transaction,
3138 3141 entry,
3139 3142 deltainfo.data,
3140 3143 link,
3141 3144 offset,
3142 3145 serialized_sidedata,
3143 3146 sidedata_offset,
3144 3147 )
3145 3148
3146 3149 rawtext = btext[0]
3147 3150
3148 3151 if alwayscache and rawtext is None:
3149 3152 rawtext = deltacomputer.buildtext(revinfo)
3150 3153
3151 3154 if type(rawtext) == bytes: # only accept immutable objects
3152 self._revisioncache = (node, curr, rawtext)
3155 self._inner._revisioncache = (node, curr, rawtext)
3153 3156 self._chainbasecache[curr] = deltainfo.chainbase
3154 3157 return curr
3155 3158
3156 3159 def _get_data_offset(self, prev):
3157 3160 """Returns the current offset in the (in-transaction) data file.
3158 3161 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3159 3162 file to store that information: since sidedata can be rewritten to the
3160 3163 end of the data file within a transaction, you can have cases where, for
3161 3164 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3162 3165 to `n - 1`'s sidedata being written after `n`'s data.
3163 3166
3164 3167 TODO cache this in a docket file before getting out of experimental."""
3165 3168 if self._docket is None:
3166 3169 return self.end(prev)
3167 3170 else:
3168 3171 return self._docket.data_end
3169 3172
3170 3173 def _writeentry(
3171 3174 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3172 3175 ):
3173 3176 # Files opened in a+ mode have inconsistent behavior on various
3174 3177 # platforms. Windows requires that a file positioning call be made
3175 3178 # when the file handle transitions between reads and writes. See
3176 3179 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3177 3180 # platforms, Python or the platform itself can be buggy. Some versions
3178 3181 # of Solaris have been observed to not append at the end of the file
3179 3182 # if the file was seeked to before the end. See issue4943 for more.
3180 3183 #
3181 3184 # We work around this issue by inserting a seek() before writing.
3182 3185 # Note: This is likely not necessary on Python 3. However, because
3183 3186 # the file handle is reused for reads and may be seeked there, we need
3184 3187 # to be careful before changing this.
3185 3188 if self._inner._writinghandles is None:
3186 3189 msg = b'adding revision outside `revlog._writing` context'
3187 3190 raise error.ProgrammingError(msg)
3188 3191 ifh, dfh, sdfh = self._inner._writinghandles
3189 3192 if self._docket is None:
3190 3193 ifh.seek(0, os.SEEK_END)
3191 3194 else:
3192 3195 ifh.seek(self._docket.index_end, os.SEEK_SET)
3193 3196 if dfh:
3194 3197 if self._docket is None:
3195 3198 dfh.seek(0, os.SEEK_END)
3196 3199 else:
3197 3200 dfh.seek(self._docket.data_end, os.SEEK_SET)
3198 3201 if sdfh:
3199 3202 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3200 3203
3201 3204 curr = len(self) - 1
3202 3205 if not self._inline:
3203 3206 transaction.add(self._datafile, offset)
3204 3207 if self._sidedatafile:
3205 3208 transaction.add(self._sidedatafile, sidedata_offset)
3206 3209 transaction.add(self._indexfile, curr * len(entry))
3207 3210 if data[0]:
3208 3211 dfh.write(data[0])
3209 3212 dfh.write(data[1])
3210 3213 if sidedata:
3211 3214 sdfh.write(sidedata)
3212 3215 ifh.write(entry)
3213 3216 else:
3214 3217 offset += curr * self.index.entry_size
3215 3218 transaction.add(self._indexfile, offset)
3216 3219 ifh.write(entry)
3217 3220 ifh.write(data[0])
3218 3221 ifh.write(data[1])
3219 3222 assert not sidedata
3220 3223 self._enforceinlinesize(transaction)
3221 3224 if self._docket is not None:
3222 3225 # revlog-v2 always has 3 writing handles, help Pytype
3223 3226 wh1 = self._inner._writinghandles[0]
3224 3227 wh2 = self._inner._writinghandles[1]
3225 3228 wh3 = self._inner._writinghandles[2]
3226 3229 assert wh1 is not None
3227 3230 assert wh2 is not None
3228 3231 assert wh3 is not None
3229 3232 self._docket.index_end = wh1.tell()
3230 3233 self._docket.data_end = wh2.tell()
3231 3234 self._docket.sidedata_end = wh3.tell()
3232 3235
3233 3236 nodemaputil.setup_persistent_nodemap(transaction, self)
3234 3237
3235 3238 def addgroup(
3236 3239 self,
3237 3240 deltas,
3238 3241 linkmapper,
3239 3242 transaction,
3240 3243 alwayscache=False,
3241 3244 addrevisioncb=None,
3242 3245 duplicaterevisioncb=None,
3243 3246 debug_info=None,
3244 3247 delta_base_reuse_policy=None,
3245 3248 ):
3246 3249 """
3247 3250 add a delta group
3248 3251
3249 3252 given a set of deltas, add them to the revision log. the
3250 3253 first delta is against its parent, which should be in our
3251 3254 log, the rest are against the previous delta.
3252 3255
3253 3256 If ``addrevisioncb`` is defined, it will be called with arguments of
3254 3257 this revlog and the node that was added.
3255 3258 """
3256 3259
3257 3260 if self._adding_group:
3258 3261 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3259 3262
3260 3263 # read the default delta-base reuse policy from revlog config if the
3261 3264 # group did not specify one.
3262 3265 if delta_base_reuse_policy is None:
3263 3266 if (
3264 3267 self.delta_config.general_delta
3265 3268 and self.delta_config.lazy_delta_base
3266 3269 ):
3267 3270 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3268 3271 else:
3269 3272 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3270 3273
3271 3274 self._adding_group = True
3272 3275 empty = True
3273 3276 try:
3274 3277 with self._writing(transaction):
3275 3278 write_debug = None
3276 3279 if self.delta_config.debug_delta:
3277 3280 write_debug = transaction._report
3278 3281 deltacomputer = deltautil.deltacomputer(
3279 3282 self,
3280 3283 write_debug=write_debug,
3281 3284 debug_info=debug_info,
3282 3285 )
3283 3286 # loop through our set of deltas
3284 3287 for data in deltas:
3285 3288 (
3286 3289 node,
3287 3290 p1,
3288 3291 p2,
3289 3292 linknode,
3290 3293 deltabase,
3291 3294 delta,
3292 3295 flags,
3293 3296 sidedata,
3294 3297 ) = data
3295 3298 link = linkmapper(linknode)
3296 3299 flags = flags or REVIDX_DEFAULT_FLAGS
3297 3300
3298 3301 rev = self.index.get_rev(node)
3299 3302 if rev is not None:
3300 3303 # this can happen if two branches make the same change
3301 3304 self._nodeduplicatecallback(transaction, rev)
3302 3305 if duplicaterevisioncb:
3303 3306 duplicaterevisioncb(self, rev)
3304 3307 empty = False
3305 3308 continue
3306 3309
3307 3310 for p in (p1, p2):
3308 3311 if not self.index.has_node(p):
3309 3312 raise error.LookupError(
3310 3313 p, self.radix, _(b'unknown parent')
3311 3314 )
3312 3315
3313 3316 if not self.index.has_node(deltabase):
3314 3317 raise error.LookupError(
3315 3318 deltabase, self.display_id, _(b'unknown delta base')
3316 3319 )
3317 3320
3318 3321 baserev = self.rev(deltabase)
3319 3322
3320 3323 if baserev != nullrev and self.iscensored(baserev):
3321 3324 # if base is censored, delta must be full replacement in a
3322 3325 # single patch operation
3323 3326 hlen = struct.calcsize(b">lll")
3324 3327 oldlen = self.rawsize(baserev)
3325 3328 newlen = len(delta) - hlen
3326 3329 if delta[:hlen] != mdiff.replacediffheader(
3327 3330 oldlen, newlen
3328 3331 ):
3329 3332 raise error.CensoredBaseError(
3330 3333 self.display_id, self.node(baserev)
3331 3334 )
3332 3335
3333 3336 if not flags and self._peek_iscensored(baserev, delta):
3334 3337 flags |= REVIDX_ISCENSORED
3335 3338
3336 3339 # We assume consumers of addrevisioncb will want to retrieve
3337 3340 # the added revision, which will require a call to
3338 3341 # revision(). revision() will fast path if there is a cache
3339 3342 # hit. So, we tell _addrevision() to always cache in this case.
3340 3343 # We're only using addgroup() in the context of changegroup
3341 3344 # generation so the revision data can always be handled as raw
3342 3345 # by the flagprocessor.
3343 3346 rev = self._addrevision(
3344 3347 node,
3345 3348 None,
3346 3349 transaction,
3347 3350 link,
3348 3351 p1,
3349 3352 p2,
3350 3353 flags,
3351 3354 (baserev, delta, delta_base_reuse_policy),
3352 3355 alwayscache=alwayscache,
3353 3356 deltacomputer=deltacomputer,
3354 3357 sidedata=sidedata,
3355 3358 )
3356 3359
3357 3360 if addrevisioncb:
3358 3361 addrevisioncb(self, rev)
3359 3362 empty = False
3360 3363 finally:
3361 3364 self._adding_group = False
3362 3365 return not empty
3363 3366
3364 3367 def iscensored(self, rev):
3365 3368 """Check if a file revision is censored."""
3366 3369 if not self.feature_config.censorable:
3367 3370 return False
3368 3371
3369 3372 return self.flags(rev) & REVIDX_ISCENSORED
3370 3373
3371 3374 def _peek_iscensored(self, baserev, delta):
3372 3375 """Quickly check if a delta produces a censored revision."""
3373 3376 if not self.feature_config.censorable:
3374 3377 return False
3375 3378
3376 3379 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3377 3380
3378 3381 def getstrippoint(self, minlink):
3379 3382 """find the minimum rev that must be stripped to strip the linkrev
3380 3383
3381 3384 Returns a tuple containing the minimum rev and a set of all revs that
3382 3385 have linkrevs that will be broken by this strip.
3383 3386 """
3384 3387 return storageutil.resolvestripinfo(
3385 3388 minlink,
3386 3389 len(self) - 1,
3387 3390 self.headrevs(),
3388 3391 self.linkrev,
3389 3392 self.parentrevs,
3390 3393 )
3391 3394
3392 3395 def strip(self, minlink, transaction):
3393 3396 """truncate the revlog on the first revision with a linkrev >= minlink
3394 3397
3395 3398 This function is called when we're stripping revision minlink and
3396 3399 its descendants from the repository.
3397 3400
3398 3401 We have to remove all revisions with linkrev >= minlink, because
3399 3402 the equivalent changelog revisions will be renumbered after the
3400 3403 strip.
3401 3404
3402 3405 So we truncate the revlog on the first of these revisions, and
3403 3406 trust that the caller has saved the revisions that shouldn't be
3404 3407 removed and that it'll re-add them after this truncation.
3405 3408 """
3406 3409 if len(self) == 0:
3407 3410 return
3408 3411
3409 3412 rev, _ = self.getstrippoint(minlink)
3410 3413 if rev == len(self):
3411 3414 return
3412 3415
3413 3416 # first truncate the files on disk
3414 3417 data_end = self.start(rev)
3415 3418 if not self._inline:
3416 3419 transaction.add(self._datafile, data_end)
3417 3420 end = rev * self.index.entry_size
3418 3421 else:
3419 3422 end = data_end + (rev * self.index.entry_size)
3420 3423
3421 3424 if self._sidedatafile:
3422 3425 sidedata_end = self.sidedata_cut_off(rev)
3423 3426 transaction.add(self._sidedatafile, sidedata_end)
3424 3427
3425 3428 transaction.add(self._indexfile, end)
3426 3429 if self._docket is not None:
3427 3430 # XXX we could, leverage the docket while stripping. However it is
3428 3431 # not powerfull enough at the time of this comment
3429 3432 self._docket.index_end = end
3430 3433 self._docket.data_end = data_end
3431 3434 self._docket.sidedata_end = sidedata_end
3432 3435 self._docket.write(transaction, stripping=True)
3433 3436
3434 3437 # then reset internal state in memory to forget those revisions
3435 self._revisioncache = None
3438 self._inner._revisioncache = None
3436 3439 self._chaininfocache = util.lrucachedict(500)
3437 3440 self._inner._segmentfile.clear_cache()
3438 3441 self._inner._segmentfile_sidedata.clear_cache()
3439 3442
3440 3443 del self.index[rev:-1]
3441 3444
3442 3445 def checksize(self):
3443 3446 """Check size of index and data files
3444 3447
3445 3448 return a (dd, di) tuple.
3446 3449 - dd: extra bytes for the "data" file
3447 3450 - di: extra bytes for the "index" file
3448 3451
3449 3452 A healthy revlog will return (0, 0).
3450 3453 """
3451 3454 expected = 0
3452 3455 if len(self):
3453 3456 expected = max(0, self.end(len(self) - 1))
3454 3457
3455 3458 try:
3456 3459 with self._datafp() as f:
3457 3460 f.seek(0, io.SEEK_END)
3458 3461 actual = f.tell()
3459 3462 dd = actual - expected
3460 3463 except FileNotFoundError:
3461 3464 dd = 0
3462 3465
3463 3466 try:
3464 3467 f = self.opener(self._indexfile)
3465 3468 f.seek(0, io.SEEK_END)
3466 3469 actual = f.tell()
3467 3470 f.close()
3468 3471 s = self.index.entry_size
3469 3472 i = max(0, actual // s)
3470 3473 di = actual - (i * s)
3471 3474 if self._inline:
3472 3475 databytes = 0
3473 3476 for r in self:
3474 3477 databytes += max(0, self.length(r))
3475 3478 dd = 0
3476 3479 di = actual - len(self) * s - databytes
3477 3480 except FileNotFoundError:
3478 3481 di = 0
3479 3482
3480 3483 return (dd, di)
3481 3484
3482 3485 def files(self):
3483 3486 """return list of files that compose this revlog"""
3484 3487 res = [self._indexfile]
3485 3488 if self._docket_file is None:
3486 3489 if not self._inline:
3487 3490 res.append(self._datafile)
3488 3491 else:
3489 3492 res.append(self._docket_file)
3490 3493 res.extend(self._docket.old_index_filepaths(include_empty=False))
3491 3494 if self._docket.data_end:
3492 3495 res.append(self._datafile)
3493 3496 res.extend(self._docket.old_data_filepaths(include_empty=False))
3494 3497 if self._docket.sidedata_end:
3495 3498 res.append(self._sidedatafile)
3496 3499 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3497 3500 return res
3498 3501
3499 3502 def emitrevisions(
3500 3503 self,
3501 3504 nodes,
3502 3505 nodesorder=None,
3503 3506 revisiondata=False,
3504 3507 assumehaveparentrevisions=False,
3505 3508 deltamode=repository.CG_DELTAMODE_STD,
3506 3509 sidedata_helpers=None,
3507 3510 debug_info=None,
3508 3511 ):
3509 3512 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3510 3513 raise error.ProgrammingError(
3511 3514 b'unhandled value for nodesorder: %s' % nodesorder
3512 3515 )
3513 3516
3514 3517 if nodesorder is None and not self.delta_config.general_delta:
3515 3518 nodesorder = b'storage'
3516 3519
3517 3520 if (
3518 3521 not self._storedeltachains
3519 3522 and deltamode != repository.CG_DELTAMODE_PREV
3520 3523 ):
3521 3524 deltamode = repository.CG_DELTAMODE_FULL
3522 3525
3523 3526 return storageutil.emitrevisions(
3524 3527 self,
3525 3528 nodes,
3526 3529 nodesorder,
3527 3530 revlogrevisiondelta,
3528 3531 deltaparentfn=self.deltaparent,
3529 3532 candeltafn=self._candelta,
3530 3533 rawsizefn=self.rawsize,
3531 3534 revdifffn=self.revdiff,
3532 3535 flagsfn=self.flags,
3533 3536 deltamode=deltamode,
3534 3537 revisiondata=revisiondata,
3535 3538 assumehaveparentrevisions=assumehaveparentrevisions,
3536 3539 sidedata_helpers=sidedata_helpers,
3537 3540 debug_info=debug_info,
3538 3541 )
3539 3542
3540 3543 DELTAREUSEALWAYS = b'always'
3541 3544 DELTAREUSESAMEREVS = b'samerevs'
3542 3545 DELTAREUSENEVER = b'never'
3543 3546
3544 3547 DELTAREUSEFULLADD = b'fulladd'
3545 3548
3546 3549 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3547 3550
3548 3551 def clone(
3549 3552 self,
3550 3553 tr,
3551 3554 destrevlog,
3552 3555 addrevisioncb=None,
3553 3556 deltareuse=DELTAREUSESAMEREVS,
3554 3557 forcedeltabothparents=None,
3555 3558 sidedata_helpers=None,
3556 3559 ):
3557 3560 """Copy this revlog to another, possibly with format changes.
3558 3561
3559 3562 The destination revlog will contain the same revisions and nodes.
3560 3563 However, it may not be bit-for-bit identical due to e.g. delta encoding
3561 3564 differences.
3562 3565
3563 3566 The ``deltareuse`` argument control how deltas from the existing revlog
3564 3567 are preserved in the destination revlog. The argument can have the
3565 3568 following values:
3566 3569
3567 3570 DELTAREUSEALWAYS
3568 3571 Deltas will always be reused (if possible), even if the destination
3569 3572 revlog would not select the same revisions for the delta. This is the
3570 3573 fastest mode of operation.
3571 3574 DELTAREUSESAMEREVS
3572 3575 Deltas will be reused if the destination revlog would pick the same
3573 3576 revisions for the delta. This mode strikes a balance between speed
3574 3577 and optimization.
3575 3578 DELTAREUSENEVER
3576 3579 Deltas will never be reused. This is the slowest mode of execution.
3577 3580 This mode can be used to recompute deltas (e.g. if the diff/delta
3578 3581 algorithm changes).
3579 3582 DELTAREUSEFULLADD
3580 3583 Revision will be re-added as if their were new content. This is
3581 3584 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3582 3585 eg: large file detection and handling.
3583 3586
3584 3587 Delta computation can be slow, so the choice of delta reuse policy can
3585 3588 significantly affect run time.
3586 3589
3587 3590 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3588 3591 two extremes. Deltas will be reused if they are appropriate. But if the
3589 3592 delta could choose a better revision, it will do so. This means if you
3590 3593 are converting a non-generaldelta revlog to a generaldelta revlog,
3591 3594 deltas will be recomputed if the delta's parent isn't a parent of the
3592 3595 revision.
3593 3596
3594 3597 In addition to the delta policy, the ``forcedeltabothparents``
3595 3598 argument controls whether to force compute deltas against both parents
3596 3599 for merges. By default, the current default is used.
3597 3600
3598 3601 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3599 3602 `sidedata_helpers`.
3600 3603 """
3601 3604 if deltareuse not in self.DELTAREUSEALL:
3602 3605 raise ValueError(
3603 3606 _(b'value for deltareuse invalid: %s') % deltareuse
3604 3607 )
3605 3608
3606 3609 if len(destrevlog):
3607 3610 raise ValueError(_(b'destination revlog is not empty'))
3608 3611
3609 3612 if getattr(self, 'filteredrevs', None):
3610 3613 raise ValueError(_(b'source revlog has filtered revisions'))
3611 3614 if getattr(destrevlog, 'filteredrevs', None):
3612 3615 raise ValueError(_(b'destination revlog has filtered revisions'))
3613 3616
3614 3617 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3615 3618 # if possible.
3616 3619 old_delta_config = destrevlog.delta_config
3617 3620 destrevlog.delta_config = destrevlog.delta_config.copy()
3618 3621
3619 3622 try:
3620 3623 if deltareuse == self.DELTAREUSEALWAYS:
3621 3624 destrevlog.delta_config.lazy_delta_base = True
3622 3625 destrevlog.delta_config.lazy_delta = True
3623 3626 elif deltareuse == self.DELTAREUSESAMEREVS:
3624 3627 destrevlog.delta_config.lazy_delta_base = False
3625 3628 destrevlog.delta_config.lazy_delta = True
3626 3629 elif deltareuse == self.DELTAREUSENEVER:
3627 3630 destrevlog.delta_config.lazy_delta_base = False
3628 3631 destrevlog.delta_config.lazy_delta = False
3629 3632
3630 3633 delta_both_parents = (
3631 3634 forcedeltabothparents or old_delta_config.delta_both_parents
3632 3635 )
3633 3636 destrevlog.delta_config.delta_both_parents = delta_both_parents
3634 3637
3635 3638 with self.reading(), destrevlog._writing(tr):
3636 3639 self._clone(
3637 3640 tr,
3638 3641 destrevlog,
3639 3642 addrevisioncb,
3640 3643 deltareuse,
3641 3644 forcedeltabothparents,
3642 3645 sidedata_helpers,
3643 3646 )
3644 3647
3645 3648 finally:
3646 3649 destrevlog.delta_config = old_delta_config
3647 3650
3648 3651 def _clone(
3649 3652 self,
3650 3653 tr,
3651 3654 destrevlog,
3652 3655 addrevisioncb,
3653 3656 deltareuse,
3654 3657 forcedeltabothparents,
3655 3658 sidedata_helpers,
3656 3659 ):
3657 3660 """perform the core duty of `revlog.clone` after parameter processing"""
3658 3661 write_debug = None
3659 3662 if self.delta_config.debug_delta:
3660 3663 write_debug = tr._report
3661 3664 deltacomputer = deltautil.deltacomputer(
3662 3665 destrevlog,
3663 3666 write_debug=write_debug,
3664 3667 )
3665 3668 index = self.index
3666 3669 for rev in self:
3667 3670 entry = index[rev]
3668 3671
3669 3672 # Some classes override linkrev to take filtered revs into
3670 3673 # account. Use raw entry from index.
3671 3674 flags = entry[0] & 0xFFFF
3672 3675 linkrev = entry[4]
3673 3676 p1 = index[entry[5]][7]
3674 3677 p2 = index[entry[6]][7]
3675 3678 node = entry[7]
3676 3679
3677 3680 # (Possibly) reuse the delta from the revlog if allowed and
3678 3681 # the revlog chunk is a delta.
3679 3682 cachedelta = None
3680 3683 rawtext = None
3681 3684 if deltareuse == self.DELTAREUSEFULLADD:
3682 3685 text = self._revisiondata(rev)
3683 3686 sidedata = self.sidedata(rev)
3684 3687
3685 3688 if sidedata_helpers is not None:
3686 3689 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3687 3690 self, sidedata_helpers, sidedata, rev
3688 3691 )
3689 3692 flags = flags | new_flags[0] & ~new_flags[1]
3690 3693
3691 3694 destrevlog.addrevision(
3692 3695 text,
3693 3696 tr,
3694 3697 linkrev,
3695 3698 p1,
3696 3699 p2,
3697 3700 cachedelta=cachedelta,
3698 3701 node=node,
3699 3702 flags=flags,
3700 3703 deltacomputer=deltacomputer,
3701 3704 sidedata=sidedata,
3702 3705 )
3703 3706 else:
3704 3707 if destrevlog.delta_config.lazy_delta:
3705 3708 dp = self.deltaparent(rev)
3706 3709 if dp != nullrev:
3707 3710 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3708 3711
3709 3712 sidedata = None
3710 3713 if not cachedelta:
3711 3714 try:
3712 3715 rawtext = self._revisiondata(rev)
3713 3716 except error.CensoredNodeError as censored:
3714 3717 assert flags & REVIDX_ISCENSORED
3715 3718 rawtext = censored.tombstone
3716 3719 sidedata = self.sidedata(rev)
3717 3720 if sidedata is None:
3718 3721 sidedata = self.sidedata(rev)
3719 3722
3720 3723 if sidedata_helpers is not None:
3721 3724 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3722 3725 self, sidedata_helpers, sidedata, rev
3723 3726 )
3724 3727 flags = flags | new_flags[0] & ~new_flags[1]
3725 3728
3726 3729 destrevlog._addrevision(
3727 3730 node,
3728 3731 rawtext,
3729 3732 tr,
3730 3733 linkrev,
3731 3734 p1,
3732 3735 p2,
3733 3736 flags,
3734 3737 cachedelta,
3735 3738 deltacomputer=deltacomputer,
3736 3739 sidedata=sidedata,
3737 3740 )
3738 3741
3739 3742 if addrevisioncb:
3740 3743 addrevisioncb(self, rev, node)
3741 3744
3742 3745 def censorrevision(self, tr, censornode, tombstone=b''):
3743 3746 if self._format_version == REVLOGV0:
3744 3747 raise error.RevlogError(
3745 3748 _(b'cannot censor with version %d revlogs')
3746 3749 % self._format_version
3747 3750 )
3748 3751 elif self._format_version == REVLOGV1:
3749 3752 rewrite.v1_censor(self, tr, censornode, tombstone)
3750 3753 else:
3751 3754 rewrite.v2_censor(self, tr, censornode, tombstone)
3752 3755
3753 3756 def verifyintegrity(self, state):
3754 3757 """Verifies the integrity of the revlog.
3755 3758
3756 3759 Yields ``revlogproblem`` instances describing problems that are
3757 3760 found.
3758 3761 """
3759 3762 dd, di = self.checksize()
3760 3763 if dd:
3761 3764 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3762 3765 if di:
3763 3766 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3764 3767
3765 3768 version = self._format_version
3766 3769
3767 3770 # The verifier tells us what version revlog we should be.
3768 3771 if version != state[b'expectedversion']:
3769 3772 yield revlogproblem(
3770 3773 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3771 3774 % (self.display_id, version, state[b'expectedversion'])
3772 3775 )
3773 3776
3774 3777 state[b'skipread'] = set()
3775 3778 state[b'safe_renamed'] = set()
3776 3779
3777 3780 for rev in self:
3778 3781 node = self.node(rev)
3779 3782
3780 3783 # Verify contents. 4 cases to care about:
3781 3784 #
3782 3785 # common: the most common case
3783 3786 # rename: with a rename
3784 3787 # meta: file content starts with b'\1\n', the metadata
3785 3788 # header defined in filelog.py, but without a rename
3786 3789 # ext: content stored externally
3787 3790 #
3788 3791 # More formally, their differences are shown below:
3789 3792 #
3790 3793 # | common | rename | meta | ext
3791 3794 # -------------------------------------------------------
3792 3795 # flags() | 0 | 0 | 0 | not 0
3793 3796 # renamed() | False | True | False | ?
3794 3797 # rawtext[0:2]=='\1\n'| False | True | True | ?
3795 3798 #
3796 3799 # "rawtext" means the raw text stored in revlog data, which
3797 3800 # could be retrieved by "rawdata(rev)". "text"
3798 3801 # mentioned below is "revision(rev)".
3799 3802 #
3800 3803 # There are 3 different lengths stored physically:
3801 3804 # 1. L1: rawsize, stored in revlog index
3802 3805 # 2. L2: len(rawtext), stored in revlog data
3803 3806 # 3. L3: len(text), stored in revlog data if flags==0, or
3804 3807 # possibly somewhere else if flags!=0
3805 3808 #
3806 3809 # L1 should be equal to L2. L3 could be different from them.
3807 3810 # "text" may or may not affect commit hash depending on flag
3808 3811 # processors (see flagutil.addflagprocessor).
3809 3812 #
3810 3813 # | common | rename | meta | ext
3811 3814 # -------------------------------------------------
3812 3815 # rawsize() | L1 | L1 | L1 | L1
3813 3816 # size() | L1 | L2-LM | L1(*) | L1 (?)
3814 3817 # len(rawtext) | L2 | L2 | L2 | L2
3815 3818 # len(text) | L2 | L2 | L2 | L3
3816 3819 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3817 3820 #
3818 3821 # LM: length of metadata, depending on rawtext
3819 3822 # (*): not ideal, see comment in filelog.size
3820 3823 # (?): could be "- len(meta)" if the resolved content has
3821 3824 # rename metadata
3822 3825 #
3823 3826 # Checks needed to be done:
3824 3827 # 1. length check: L1 == L2, in all cases.
3825 3828 # 2. hash check: depending on flag processor, we may need to
3826 3829 # use either "text" (external), or "rawtext" (in revlog).
3827 3830
3828 3831 try:
3829 3832 skipflags = state.get(b'skipflags', 0)
3830 3833 if skipflags:
3831 3834 skipflags &= self.flags(rev)
3832 3835
3833 3836 _verify_revision(self, skipflags, state, node)
3834 3837
3835 3838 l1 = self.rawsize(rev)
3836 3839 l2 = len(self.rawdata(node))
3837 3840
3838 3841 if l1 != l2:
3839 3842 yield revlogproblem(
3840 3843 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3841 3844 node=node,
3842 3845 )
3843 3846
3844 3847 except error.CensoredNodeError:
3845 3848 if state[b'erroroncensored']:
3846 3849 yield revlogproblem(
3847 3850 error=_(b'censored file data'), node=node
3848 3851 )
3849 3852 state[b'skipread'].add(node)
3850 3853 except Exception as e:
3851 3854 yield revlogproblem(
3852 3855 error=_(b'unpacking %s: %s')
3853 3856 % (short(node), stringutil.forcebytestr(e)),
3854 3857 node=node,
3855 3858 )
3856 3859 state[b'skipread'].add(node)
3857 3860
3858 3861 def storageinfo(
3859 3862 self,
3860 3863 exclusivefiles=False,
3861 3864 sharedfiles=False,
3862 3865 revisionscount=False,
3863 3866 trackedsize=False,
3864 3867 storedsize=False,
3865 3868 ):
3866 3869 d = {}
3867 3870
3868 3871 if exclusivefiles:
3869 3872 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3870 3873 if not self._inline:
3871 3874 d[b'exclusivefiles'].append((self.opener, self._datafile))
3872 3875
3873 3876 if sharedfiles:
3874 3877 d[b'sharedfiles'] = []
3875 3878
3876 3879 if revisionscount:
3877 3880 d[b'revisionscount'] = len(self)
3878 3881
3879 3882 if trackedsize:
3880 3883 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3881 3884
3882 3885 if storedsize:
3883 3886 d[b'storedsize'] = sum(
3884 3887 self.opener.stat(path).st_size for path in self.files()
3885 3888 )
3886 3889
3887 3890 return d
3888 3891
3889 3892 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3890 3893 if not self.feature_config.has_side_data:
3891 3894 return
3892 3895 # revlog formats with sidedata support does not support inline
3893 3896 assert not self._inline
3894 3897 if not helpers[1] and not helpers[2]:
3895 3898 # Nothing to generate or remove
3896 3899 return
3897 3900
3898 3901 new_entries = []
3899 3902 # append the new sidedata
3900 3903 with self._writing(transaction):
3901 3904 ifh, dfh, sdfh = self._inner._writinghandles
3902 3905 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3903 3906
3904 3907 current_offset = sdfh.tell()
3905 3908 for rev in range(startrev, endrev + 1):
3906 3909 entry = self.index[rev]
3907 3910 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3908 3911 store=self,
3909 3912 sidedata_helpers=helpers,
3910 3913 sidedata={},
3911 3914 rev=rev,
3912 3915 )
3913 3916
3914 3917 serialized_sidedata = sidedatautil.serialize_sidedata(
3915 3918 new_sidedata
3916 3919 )
3917 3920
3918 3921 sidedata_compression_mode = COMP_MODE_INLINE
3919 3922 if serialized_sidedata and self.feature_config.has_side_data:
3920 3923 sidedata_compression_mode = COMP_MODE_PLAIN
3921 3924 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3922 3925 if (
3923 3926 h != b'u'
3924 3927 and comp_sidedata[0] != b'\0'
3925 3928 and len(comp_sidedata) < len(serialized_sidedata)
3926 3929 ):
3927 3930 assert not h
3928 3931 if (
3929 3932 comp_sidedata[0]
3930 3933 == self._docket.default_compression_header
3931 3934 ):
3932 3935 sidedata_compression_mode = COMP_MODE_DEFAULT
3933 3936 serialized_sidedata = comp_sidedata
3934 3937 else:
3935 3938 sidedata_compression_mode = COMP_MODE_INLINE
3936 3939 serialized_sidedata = comp_sidedata
3937 3940 if entry[8] != 0 or entry[9] != 0:
3938 3941 # rewriting entries that already have sidedata is not
3939 3942 # supported yet, because it introduces garbage data in the
3940 3943 # revlog.
3941 3944 msg = b"rewriting existing sidedata is not supported yet"
3942 3945 raise error.Abort(msg)
3943 3946
3944 3947 # Apply (potential) flags to add and to remove after running
3945 3948 # the sidedata helpers
3946 3949 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3947 3950 entry_update = (
3948 3951 current_offset,
3949 3952 len(serialized_sidedata),
3950 3953 new_offset_flags,
3951 3954 sidedata_compression_mode,
3952 3955 )
3953 3956
3954 3957 # the sidedata computation might have move the file cursors around
3955 3958 sdfh.seek(current_offset, os.SEEK_SET)
3956 3959 sdfh.write(serialized_sidedata)
3957 3960 new_entries.append(entry_update)
3958 3961 current_offset += len(serialized_sidedata)
3959 3962 self._docket.sidedata_end = sdfh.tell()
3960 3963
3961 3964 # rewrite the new index entries
3962 3965 ifh.seek(startrev * self.index.entry_size)
3963 3966 for i, e in enumerate(new_entries):
3964 3967 rev = startrev + i
3965 3968 self.index.replace_sidedata_info(rev, *e)
3966 3969 packed = self.index.entry_binary(rev)
3967 3970 if rev == 0 and self._docket is None:
3968 3971 header = self._format_flags | self._format_version
3969 3972 header = self.index.pack_header(header)
3970 3973 packed = header + packed
3971 3974 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now