##// END OF EJS Templates
revlog: move `offset_type` to `revlogutils`...
marmoute -
r48186:ac60a136 default
parent child Browse files
Show More
@@ -1,716 +1,717 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import os
17 17 import shutil
18 18
19 19 from .i18n import _
20 20 from .node import (
21 21 hex,
22 22 nullrev,
23 23 )
24 24
25 25 from . import (
26 26 bundle2,
27 27 changegroup,
28 28 changelog,
29 29 cmdutil,
30 30 discovery,
31 31 encoding,
32 32 error,
33 33 exchange,
34 34 filelog,
35 35 localrepo,
36 36 manifest,
37 37 mdiff,
38 38 pathutil,
39 39 phases,
40 40 pycompat,
41 41 revlog,
42 revlogutils,
42 43 util,
43 44 vfs as vfsmod,
44 45 )
45 46 from .utils import (
46 47 urlutil,
47 48 )
48 49
49 50 from .revlogutils import (
50 51 constants as revlog_constants,
51 52 )
52 53
53 54
54 55 class bundlerevlog(revlog.revlog):
55 56 def __init__(self, opener, target, radix, cgunpacker, linkmapper):
56 57 # How it works:
57 58 # To retrieve a revision, we need to know the offset of the revision in
58 59 # the bundle (an unbundle object). We store this offset in the index
59 60 # (start). The base of the delta is stored in the base field.
60 61 #
61 62 # To differentiate a rev in the bundle from a rev in the revlog, we
62 63 # check revision against repotiprev.
63 64 opener = vfsmod.readonlyvfs(opener)
64 65 revlog.revlog.__init__(self, opener, target=target, radix=radix)
65 66 self.bundle = cgunpacker
66 67 n = len(self)
67 68 self.repotiprev = n - 1
68 69 self.bundlerevs = set() # used by 'bundle()' revset expression
69 70 for deltadata in cgunpacker.deltaiter():
70 71 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
71 72
72 73 size = len(delta)
73 74 start = cgunpacker.tell() - size
74 75
75 76 if self.index.has_node(node):
76 77 # this can happen if two branches make the same change
77 78 self.bundlerevs.add(self.index.rev(node))
78 79 continue
79 80 if cs == node:
80 81 linkrev = nullrev
81 82 else:
82 83 linkrev = linkmapper(cs)
83 84
84 85 for p in (p1, p2):
85 86 if not self.index.has_node(p):
86 87 raise error.LookupError(
87 88 p, self.display_id, _(b"unknown parent")
88 89 )
89 90
90 91 if not self.index.has_node(deltabase):
91 92 raise LookupError(
92 93 deltabase, self.display_id, _(b'unknown delta base')
93 94 )
94 95
95 96 baserev = self.rev(deltabase)
96 97 # start, size, full unc. size, base (unused), link, p1, p2, node, sidedata_offset (unused), sidedata_size (unused)
97 98 e = (
98 revlog.offset_type(start, flags),
99 revlogutils.offset_type(start, flags),
99 100 size,
100 101 -1,
101 102 baserev,
102 103 linkrev,
103 104 self.rev(p1),
104 105 self.rev(p2),
105 106 node,
106 107 0,
107 108 0,
108 109 revlog_constants.COMP_MODE_INLINE,
109 110 revlog_constants.COMP_MODE_INLINE,
110 111 )
111 112 self.index.append(e)
112 113 self.bundlerevs.add(n)
113 114 n += 1
114 115
115 116 def _chunk(self, rev, df=None):
116 117 # Warning: in case of bundle, the diff is against what we stored as
117 118 # delta base, not against rev - 1
118 119 # XXX: could use some caching
119 120 if rev <= self.repotiprev:
120 121 return revlog.revlog._chunk(self, rev)
121 122 self.bundle.seek(self.start(rev))
122 123 return self.bundle.read(self.length(rev))
123 124
124 125 def revdiff(self, rev1, rev2):
125 126 """return or calculate a delta between two revisions"""
126 127 if rev1 > self.repotiprev and rev2 > self.repotiprev:
127 128 # hot path for bundle
128 129 revb = self.index[rev2][3]
129 130 if revb == rev1:
130 131 return self._chunk(rev2)
131 132 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
132 133 return revlog.revlog.revdiff(self, rev1, rev2)
133 134
134 135 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
135 136
136 137 def _rawtext(self, node, rev, _df=None):
137 138 if rev is None:
138 139 rev = self.rev(node)
139 140 validated = False
140 141 rawtext = None
141 142 chain = []
142 143 iterrev = rev
143 144 # reconstruct the revision if it is from a changegroup
144 145 while iterrev > self.repotiprev:
145 146 if self._revisioncache and self._revisioncache[1] == iterrev:
146 147 rawtext = self._revisioncache[2]
147 148 break
148 149 chain.append(iterrev)
149 150 iterrev = self.index[iterrev][3]
150 151 if iterrev == nullrev:
151 152 rawtext = b''
152 153 elif rawtext is None:
153 154 r = super(bundlerevlog, self)._rawtext(
154 155 self.node(iterrev), iterrev, _df=_df
155 156 )
156 157 __, rawtext, validated = r
157 158 if chain:
158 159 validated = False
159 160 while chain:
160 161 delta = self._chunk(chain.pop())
161 162 rawtext = mdiff.patches(rawtext, [delta])
162 163 return rev, rawtext, validated
163 164
164 165 def addrevision(self, *args, **kwargs):
165 166 raise NotImplementedError
166 167
167 168 def addgroup(self, *args, **kwargs):
168 169 raise NotImplementedError
169 170
170 171 def strip(self, *args, **kwargs):
171 172 raise NotImplementedError
172 173
173 174 def checksize(self):
174 175 raise NotImplementedError
175 176
176 177
177 178 class bundlechangelog(bundlerevlog, changelog.changelog):
178 179 def __init__(self, opener, cgunpacker):
179 180 changelog.changelog.__init__(self, opener)
180 181 linkmapper = lambda x: x
181 182 bundlerevlog.__init__(
182 183 self,
183 184 opener,
184 185 (revlog_constants.KIND_CHANGELOG, None),
185 186 self.radix,
186 187 cgunpacker,
187 188 linkmapper,
188 189 )
189 190
190 191
191 192 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
192 193 def __init__(
193 194 self,
194 195 nodeconstants,
195 196 opener,
196 197 cgunpacker,
197 198 linkmapper,
198 199 dirlogstarts=None,
199 200 dir=b'',
200 201 ):
201 202 manifest.manifestrevlog.__init__(self, nodeconstants, opener, tree=dir)
202 203 bundlerevlog.__init__(
203 204 self,
204 205 opener,
205 206 (revlog_constants.KIND_MANIFESTLOG, dir),
206 207 self._revlog.radix,
207 208 cgunpacker,
208 209 linkmapper,
209 210 )
210 211 if dirlogstarts is None:
211 212 dirlogstarts = {}
212 213 if self.bundle.version == b"03":
213 214 dirlogstarts = _getfilestarts(self.bundle)
214 215 self._dirlogstarts = dirlogstarts
215 216 self._linkmapper = linkmapper
216 217
217 218 def dirlog(self, d):
218 219 if d in self._dirlogstarts:
219 220 self.bundle.seek(self._dirlogstarts[d])
220 221 return bundlemanifest(
221 222 self.nodeconstants,
222 223 self.opener,
223 224 self.bundle,
224 225 self._linkmapper,
225 226 self._dirlogstarts,
226 227 dir=d,
227 228 )
228 229 return super(bundlemanifest, self).dirlog(d)
229 230
230 231
231 232 class bundlefilelog(filelog.filelog):
232 233 def __init__(self, opener, path, cgunpacker, linkmapper):
233 234 filelog.filelog.__init__(self, opener, path)
234 235 self._revlog = bundlerevlog(
235 236 opener,
236 237 # XXX should use the unencoded path
237 238 target=(revlog_constants.KIND_FILELOG, path),
238 239 radix=self._revlog.radix,
239 240 cgunpacker=cgunpacker,
240 241 linkmapper=linkmapper,
241 242 )
242 243
243 244
244 245 class bundlepeer(localrepo.localpeer):
245 246 def canpush(self):
246 247 return False
247 248
248 249
249 250 class bundlephasecache(phases.phasecache):
250 251 def __init__(self, *args, **kwargs):
251 252 super(bundlephasecache, self).__init__(*args, **kwargs)
252 253 if util.safehasattr(self, 'opener'):
253 254 self.opener = vfsmod.readonlyvfs(self.opener)
254 255
255 256 def write(self):
256 257 raise NotImplementedError
257 258
258 259 def _write(self, fp):
259 260 raise NotImplementedError
260 261
261 262 def _updateroots(self, phase, newroots, tr):
262 263 self.phaseroots[phase] = newroots
263 264 self.invalidate()
264 265 self.dirty = True
265 266
266 267
267 268 def _getfilestarts(cgunpacker):
268 269 filespos = {}
269 270 for chunkdata in iter(cgunpacker.filelogheader, {}):
270 271 fname = chunkdata[b'filename']
271 272 filespos[fname] = cgunpacker.tell()
272 273 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
273 274 pass
274 275 return filespos
275 276
276 277
277 278 class bundlerepository(object):
278 279 """A repository instance that is a union of a local repo and a bundle.
279 280
280 281 Instances represent a read-only repository composed of a local repository
281 282 with the contents of a bundle file applied. The repository instance is
282 283 conceptually similar to the state of a repository after an
283 284 ``hg unbundle`` operation. However, the contents of the bundle are never
284 285 applied to the actual base repository.
285 286
286 287 Instances constructed directly are not usable as repository objects.
287 288 Use instance() or makebundlerepository() to create instances.
288 289 """
289 290
290 291 def __init__(self, bundlepath, url, tempparent):
291 292 self._tempparent = tempparent
292 293 self._url = url
293 294
294 295 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
295 296
296 297 self.tempfile = None
297 298 f = util.posixfile(bundlepath, b"rb")
298 299 bundle = exchange.readbundle(self.ui, f, bundlepath)
299 300
300 301 if isinstance(bundle, bundle2.unbundle20):
301 302 self._bundlefile = bundle
302 303 self._cgunpacker = None
303 304
304 305 cgpart = None
305 306 for part in bundle.iterparts(seekable=True):
306 307 if part.type == b'changegroup':
307 308 if cgpart:
308 309 raise NotImplementedError(
309 310 b"can't process multiple changegroups"
310 311 )
311 312 cgpart = part
312 313
313 314 self._handlebundle2part(bundle, part)
314 315
315 316 if not cgpart:
316 317 raise error.Abort(_(b"No changegroups found"))
317 318
318 319 # This is required to placate a later consumer, which expects
319 320 # the payload offset to be at the beginning of the changegroup.
320 321 # We need to do this after the iterparts() generator advances
321 322 # because iterparts() will seek to end of payload after the
322 323 # generator returns control to iterparts().
323 324 cgpart.seek(0, os.SEEK_SET)
324 325
325 326 elif isinstance(bundle, changegroup.cg1unpacker):
326 327 if bundle.compressed():
327 328 f = self._writetempbundle(
328 329 bundle.read, b'.hg10un', header=b'HG10UN'
329 330 )
330 331 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
331 332
332 333 self._bundlefile = bundle
333 334 self._cgunpacker = bundle
334 335 else:
335 336 raise error.Abort(
336 337 _(b'bundle type %s cannot be read') % type(bundle)
337 338 )
338 339
339 340 # dict with the mapping 'filename' -> position in the changegroup.
340 341 self._cgfilespos = {}
341 342
342 343 self.firstnewrev = self.changelog.repotiprev + 1
343 344 phases.retractboundary(
344 345 self,
345 346 None,
346 347 phases.draft,
347 348 [ctx.node() for ctx in self[self.firstnewrev :]],
348 349 )
349 350
350 351 def _handlebundle2part(self, bundle, part):
351 352 if part.type != b'changegroup':
352 353 return
353 354
354 355 cgstream = part
355 356 version = part.params.get(b'version', b'01')
356 357 legalcgvers = changegroup.supportedincomingversions(self)
357 358 if version not in legalcgvers:
358 359 msg = _(b'Unsupported changegroup version: %s')
359 360 raise error.Abort(msg % version)
360 361 if bundle.compressed():
361 362 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
362 363
363 364 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
364 365
365 366 def _writetempbundle(self, readfn, suffix, header=b''):
366 367 """Write a temporary file to disk"""
367 368 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
368 369 self.tempfile = temp
369 370
370 371 with os.fdopen(fdtemp, 'wb') as fptemp:
371 372 fptemp.write(header)
372 373 while True:
373 374 chunk = readfn(2 ** 18)
374 375 if not chunk:
375 376 break
376 377 fptemp.write(chunk)
377 378
378 379 return self.vfs.open(self.tempfile, mode=b"rb")
379 380
380 381 @localrepo.unfilteredpropertycache
381 382 def _phasecache(self):
382 383 return bundlephasecache(self, self._phasedefaults)
383 384
384 385 @localrepo.unfilteredpropertycache
385 386 def changelog(self):
386 387 # consume the header if it exists
387 388 self._cgunpacker.changelogheader()
388 389 c = bundlechangelog(self.svfs, self._cgunpacker)
389 390 self.manstart = self._cgunpacker.tell()
390 391 return c
391 392
392 393 def _refreshchangelog(self):
393 394 # changelog for bundle repo are not filecache, this method is not
394 395 # applicable.
395 396 pass
396 397
397 398 @localrepo.unfilteredpropertycache
398 399 def manifestlog(self):
399 400 self._cgunpacker.seek(self.manstart)
400 401 # consume the header if it exists
401 402 self._cgunpacker.manifestheader()
402 403 linkmapper = self.unfiltered().changelog.rev
403 404 rootstore = bundlemanifest(
404 405 self.nodeconstants, self.svfs, self._cgunpacker, linkmapper
405 406 )
406 407 self.filestart = self._cgunpacker.tell()
407 408
408 409 return manifest.manifestlog(
409 410 self.svfs, self, rootstore, self.narrowmatch()
410 411 )
411 412
412 413 def _consumemanifest(self):
413 414 """Consumes the manifest portion of the bundle, setting filestart so the
414 415 file portion can be read."""
415 416 self._cgunpacker.seek(self.manstart)
416 417 self._cgunpacker.manifestheader()
417 418 for delta in self._cgunpacker.deltaiter():
418 419 pass
419 420 self.filestart = self._cgunpacker.tell()
420 421
421 422 @localrepo.unfilteredpropertycache
422 423 def manstart(self):
423 424 self.changelog
424 425 return self.manstart
425 426
426 427 @localrepo.unfilteredpropertycache
427 428 def filestart(self):
428 429 self.manifestlog
429 430
430 431 # If filestart was not set by self.manifestlog, that means the
431 432 # manifestlog implementation did not consume the manifests from the
432 433 # changegroup (ex: it might be consuming trees from a separate bundle2
433 434 # part instead). So we need to manually consume it.
434 435 if 'filestart' not in self.__dict__:
435 436 self._consumemanifest()
436 437
437 438 return self.filestart
438 439
439 440 def url(self):
440 441 return self._url
441 442
442 443 def file(self, f):
443 444 if not self._cgfilespos:
444 445 self._cgunpacker.seek(self.filestart)
445 446 self._cgfilespos = _getfilestarts(self._cgunpacker)
446 447
447 448 if f in self._cgfilespos:
448 449 self._cgunpacker.seek(self._cgfilespos[f])
449 450 linkmapper = self.unfiltered().changelog.rev
450 451 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
451 452 else:
452 453 return super(bundlerepository, self).file(f)
453 454
454 455 def close(self):
455 456 """Close assigned bundle file immediately."""
456 457 self._bundlefile.close()
457 458 if self.tempfile is not None:
458 459 self.vfs.unlink(self.tempfile)
459 460 if self._tempparent:
460 461 shutil.rmtree(self._tempparent, True)
461 462
462 463 def cancopy(self):
463 464 return False
464 465
465 466 def peer(self):
466 467 return bundlepeer(self)
467 468
468 469 def getcwd(self):
469 470 return encoding.getcwd() # always outside the repo
470 471
471 472 # Check if parents exist in localrepo before setting
472 473 def setparents(self, p1, p2=None):
473 474 if p2 is None:
474 475 p2 = self.nullid
475 476 p1rev = self.changelog.rev(p1)
476 477 p2rev = self.changelog.rev(p2)
477 478 msg = _(b"setting parent to node %s that only exists in the bundle\n")
478 479 if self.changelog.repotiprev < p1rev:
479 480 self.ui.warn(msg % hex(p1))
480 481 if self.changelog.repotiprev < p2rev:
481 482 self.ui.warn(msg % hex(p2))
482 483 return super(bundlerepository, self).setparents(p1, p2)
483 484
484 485
485 486 def instance(ui, path, create, intents=None, createopts=None):
486 487 if create:
487 488 raise error.Abort(_(b'cannot create new bundle repository'))
488 489 # internal config: bundle.mainreporoot
489 490 parentpath = ui.config(b"bundle", b"mainreporoot")
490 491 if not parentpath:
491 492 # try to find the correct path to the working directory repo
492 493 parentpath = cmdutil.findrepo(encoding.getcwd())
493 494 if parentpath is None:
494 495 parentpath = b''
495 496 if parentpath:
496 497 # Try to make the full path relative so we get a nice, short URL.
497 498 # In particular, we don't want temp dir names in test outputs.
498 499 cwd = encoding.getcwd()
499 500 if parentpath == cwd:
500 501 parentpath = b''
501 502 else:
502 503 cwd = pathutil.normasprefix(cwd)
503 504 if parentpath.startswith(cwd):
504 505 parentpath = parentpath[len(cwd) :]
505 506 u = urlutil.url(path)
506 507 path = u.localpath()
507 508 if u.scheme == b'bundle':
508 509 s = path.split(b"+", 1)
509 510 if len(s) == 1:
510 511 repopath, bundlename = parentpath, s[0]
511 512 else:
512 513 repopath, bundlename = s
513 514 else:
514 515 repopath, bundlename = parentpath, path
515 516
516 517 return makebundlerepository(ui, repopath, bundlename)
517 518
518 519
519 520 def makebundlerepository(ui, repopath, bundlepath):
520 521 """Make a bundle repository object based on repo and bundle paths."""
521 522 if repopath:
522 523 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
523 524 else:
524 525 url = b'bundle:%s' % bundlepath
525 526
526 527 # Because we can't make any guarantees about the type of the base
527 528 # repository, we can't have a static class representing the bundle
528 529 # repository. We also can't make any guarantees about how to even
529 530 # call the base repository's constructor!
530 531 #
531 532 # So, our strategy is to go through ``localrepo.instance()`` to construct
532 533 # a repo instance. Then, we dynamically create a new type derived from
533 534 # both it and our ``bundlerepository`` class which overrides some
534 535 # functionality. We then change the type of the constructed repository
535 536 # to this new type and initialize the bundle-specific bits of it.
536 537
537 538 try:
538 539 repo = localrepo.instance(ui, repopath, create=False)
539 540 tempparent = None
540 541 except error.RepoError:
541 542 tempparent = pycompat.mkdtemp()
542 543 try:
543 544 repo = localrepo.instance(ui, tempparent, create=True)
544 545 except Exception:
545 546 shutil.rmtree(tempparent)
546 547 raise
547 548
548 549 class derivedbundlerepository(bundlerepository, repo.__class__):
549 550 pass
550 551
551 552 repo.__class__ = derivedbundlerepository
552 553 bundlerepository.__init__(repo, bundlepath, url, tempparent)
553 554
554 555 return repo
555 556
556 557
557 558 class bundletransactionmanager(object):
558 559 def transaction(self):
559 560 return None
560 561
561 562 def close(self):
562 563 raise NotImplementedError
563 564
564 565 def release(self):
565 566 raise NotImplementedError
566 567
567 568
568 569 def getremotechanges(
569 570 ui, repo, peer, onlyheads=None, bundlename=None, force=False
570 571 ):
571 572 """obtains a bundle of changes incoming from peer
572 573
573 574 "onlyheads" restricts the returned changes to those reachable from the
574 575 specified heads.
575 576 "bundlename", if given, stores the bundle to this file path permanently;
576 577 otherwise it's stored to a temp file and gets deleted again when you call
577 578 the returned "cleanupfn".
578 579 "force" indicates whether to proceed on unrelated repos.
579 580
580 581 Returns a tuple (local, csets, cleanupfn):
581 582
582 583 "local" is a local repo from which to obtain the actual incoming
583 584 changesets; it is a bundlerepo for the obtained bundle when the
584 585 original "peer" is remote.
585 586 "csets" lists the incoming changeset node ids.
586 587 "cleanupfn" must be called without arguments when you're done processing
587 588 the changes; it closes both the original "peer" and the one returned
588 589 here.
589 590 """
590 591 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
591 592 common, incoming, rheads = tmp
592 593 if not incoming:
593 594 try:
594 595 if bundlename:
595 596 os.unlink(bundlename)
596 597 except OSError:
597 598 pass
598 599 return repo, [], peer.close
599 600
600 601 commonset = set(common)
601 602 rheads = [x for x in rheads if x not in commonset]
602 603
603 604 bundle = None
604 605 bundlerepo = None
605 606 localrepo = peer.local()
606 607 if bundlename or not localrepo:
607 608 # create a bundle (uncompressed if peer repo is not local)
608 609
609 610 # developer config: devel.legacy.exchange
610 611 legexc = ui.configlist(b'devel', b'legacy.exchange')
611 612 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
612 613 canbundle2 = (
613 614 not forcebundle1
614 615 and peer.capable(b'getbundle')
615 616 and peer.capable(b'bundle2')
616 617 )
617 618 if canbundle2:
618 619 with peer.commandexecutor() as e:
619 620 b2 = e.callcommand(
620 621 b'getbundle',
621 622 {
622 623 b'source': b'incoming',
623 624 b'common': common,
624 625 b'heads': rheads,
625 626 b'bundlecaps': exchange.caps20to10(
626 627 repo, role=b'client'
627 628 ),
628 629 b'cg': True,
629 630 },
630 631 ).result()
631 632
632 633 fname = bundle = changegroup.writechunks(
633 634 ui, b2._forwardchunks(), bundlename
634 635 )
635 636 else:
636 637 if peer.capable(b'getbundle'):
637 638 with peer.commandexecutor() as e:
638 639 cg = e.callcommand(
639 640 b'getbundle',
640 641 {
641 642 b'source': b'incoming',
642 643 b'common': common,
643 644 b'heads': rheads,
644 645 },
645 646 ).result()
646 647 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
647 648 # compat with older servers when pulling all remote heads
648 649
649 650 with peer.commandexecutor() as e:
650 651 cg = e.callcommand(
651 652 b'changegroup',
652 653 {
653 654 b'nodes': incoming,
654 655 b'source': b'incoming',
655 656 },
656 657 ).result()
657 658
658 659 rheads = None
659 660 else:
660 661 with peer.commandexecutor() as e:
661 662 cg = e.callcommand(
662 663 b'changegroupsubset',
663 664 {
664 665 b'bases': incoming,
665 666 b'heads': rheads,
666 667 b'source': b'incoming',
667 668 },
668 669 ).result()
669 670
670 671 if localrepo:
671 672 bundletype = b"HG10BZ"
672 673 else:
673 674 bundletype = b"HG10UN"
674 675 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
675 676 # keep written bundle?
676 677 if bundlename:
677 678 bundle = None
678 679 if not localrepo:
679 680 # use the created uncompressed bundlerepo
680 681 localrepo = bundlerepo = makebundlerepository(
681 682 repo.baseui, repo.root, fname
682 683 )
683 684
684 685 # this repo contains local and peer now, so filter out local again
685 686 common = repo.heads()
686 687 if localrepo:
687 688 # Part of common may be remotely filtered
688 689 # So use an unfiltered version
689 690 # The discovery process probably need cleanup to avoid that
690 691 localrepo = localrepo.unfiltered()
691 692
692 693 csets = localrepo.changelog.findmissing(common, rheads)
693 694
694 695 if bundlerepo:
695 696 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
696 697
697 698 with peer.commandexecutor() as e:
698 699 remotephases = e.callcommand(
699 700 b'listkeys',
700 701 {
701 702 b'namespace': b'phases',
702 703 },
703 704 ).result()
704 705
705 706 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
706 707 pullop.trmanager = bundletransactionmanager()
707 708 exchange._pullapplyphases(pullop, remotephases)
708 709
709 710 def cleanup():
710 711 if bundlerepo:
711 712 bundlerepo.close()
712 713 if bundle:
713 714 os.unlink(bundle)
714 715 peer.close()
715 716
716 717 return (localrepo, csets, cleanup)
@@ -1,432 +1,430 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 error,
19 19 pycompat,
20 revlogutils,
20 21 util,
21 22 )
22 23
23 24 from ..revlogutils import nodemap as nodemaputil
24 25 from ..revlogutils import constants as revlog_constants
25 26
26 27 stringio = pycompat.bytesio
27 28
28 29
29 30 _pack = struct.pack
30 31 _unpack = struct.unpack
31 32 _compress = zlib.compress
32 33 _decompress = zlib.decompress
33 34
34 35 # Some code below makes tuples directly because it's more convenient. However,
35 36 # code outside this module should always use dirstatetuple.
36 37 def dirstatetuple(*x):
37 38 # x is a tuple
38 39 return x
39 40
40 41
41 42 def gettype(q):
42 43 return int(q & 0xFFFF)
43 44
44 45
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
47
48
49 46 class BaseIndexObject(object):
50 47 # Can I be passed to an algorithme implemented in Rust ?
51 48 rust_ext_compat = 0
52 49 # Format of an index entry according to Python's `struct` language
53 50 index_format = revlog_constants.INDEX_ENTRY_V1
54 51 # Size of a C unsigned long long int, platform independent
55 52 big_int_size = struct.calcsize(b'>Q')
56 53 # Size of a C long int, platform independent
57 54 int_size = struct.calcsize(b'>i')
58 55 # An empty index entry, used as a default value to be overridden, or nullrev
59 56 null_item = (
60 57 0,
61 58 0,
62 59 0,
63 60 -1,
64 61 -1,
65 62 -1,
66 63 -1,
67 64 sha1nodeconstants.nullid,
68 65 0,
69 66 0,
70 67 revlog_constants.COMP_MODE_INLINE,
71 68 revlog_constants.COMP_MODE_INLINE,
72 69 )
73 70
74 71 @util.propertycache
75 72 def entry_size(self):
76 73 return self.index_format.size
77 74
78 75 @property
79 76 def nodemap(self):
80 77 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
81 78 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
82 79 return self._nodemap
83 80
84 81 @util.propertycache
85 82 def _nodemap(self):
86 83 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
87 84 for r in range(0, len(self)):
88 85 n = self[r][7]
89 86 nodemap[n] = r
90 87 return nodemap
91 88
92 89 def has_node(self, node):
93 90 """return True if the node exist in the index"""
94 91 return node in self._nodemap
95 92
96 93 def rev(self, node):
97 94 """return a revision for a node
98 95
99 96 If the node is unknown, raise a RevlogError"""
100 97 return self._nodemap[node]
101 98
102 99 def get_rev(self, node):
103 100 """return a revision for a node
104 101
105 102 If the node is unknown, return None"""
106 103 return self._nodemap.get(node)
107 104
108 105 def _stripnodes(self, start):
109 106 if '_nodemap' in vars(self):
110 107 for r in range(start, len(self)):
111 108 n = self[r][7]
112 109 del self._nodemap[n]
113 110
114 111 def clearcaches(self):
115 112 self.__dict__.pop('_nodemap', None)
116 113
117 114 def __len__(self):
118 115 return self._lgt + len(self._extra)
119 116
120 117 def append(self, tup):
121 118 if '_nodemap' in vars(self):
122 119 self._nodemap[tup[7]] = len(self)
123 120 data = self._pack_entry(len(self), tup)
124 121 self._extra.append(data)
125 122
126 123 def _pack_entry(self, rev, entry):
127 124 assert entry[8] == 0
128 125 assert entry[9] == 0
129 126 return self.index_format.pack(*entry[:8])
130 127
131 128 def _check_index(self, i):
132 129 if not isinstance(i, int):
133 130 raise TypeError(b"expecting int indexes")
134 131 if i < 0 or i >= len(self):
135 132 raise IndexError
136 133
137 134 def __getitem__(self, i):
138 135 if i == -1:
139 136 return self.null_item
140 137 self._check_index(i)
141 138 if i >= self._lgt:
142 139 data = self._extra[i - self._lgt]
143 140 else:
144 141 index = self._calculate_index(i)
145 142 data = self._data[index : index + self.entry_size]
146 143 r = self._unpack_entry(i, data)
147 144 if self._lgt and i == 0:
148 r = (offset_type(0, gettype(r[0])),) + r[1:]
145 offset = revlogutils.offset_type(0, gettype(r[0]))
146 r = (offset,) + r[1:]
149 147 return r
150 148
151 149 def _unpack_entry(self, rev, data):
152 150 r = self.index_format.unpack(data)
153 151 r = r + (
154 152 0,
155 153 0,
156 154 revlog_constants.COMP_MODE_INLINE,
157 155 revlog_constants.COMP_MODE_INLINE,
158 156 )
159 157 return r
160 158
161 159 def pack_header(self, header):
162 160 """pack header information as binary"""
163 161 v_fmt = revlog_constants.INDEX_HEADER
164 162 return v_fmt.pack(header)
165 163
166 164 def entry_binary(self, rev):
167 165 """return the raw binary string representing a revision"""
168 166 entry = self[rev]
169 167 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
170 168 if rev == 0:
171 169 p = p[revlog_constants.INDEX_HEADER.size :]
172 170 return p
173 171
174 172
175 173 class IndexObject(BaseIndexObject):
176 174 def __init__(self, data):
177 175 assert len(data) % self.entry_size == 0, (
178 176 len(data),
179 177 self.entry_size,
180 178 len(data) % self.entry_size,
181 179 )
182 180 self._data = data
183 181 self._lgt = len(data) // self.entry_size
184 182 self._extra = []
185 183
186 184 def _calculate_index(self, i):
187 185 return i * self.entry_size
188 186
189 187 def __delitem__(self, i):
190 188 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
191 189 raise ValueError(b"deleting slices only supports a:-1 with step 1")
192 190 i = i.start
193 191 self._check_index(i)
194 192 self._stripnodes(i)
195 193 if i < self._lgt:
196 194 self._data = self._data[: i * self.entry_size]
197 195 self._lgt = i
198 196 self._extra = []
199 197 else:
200 198 self._extra = self._extra[: i - self._lgt]
201 199
202 200
203 201 class PersistentNodeMapIndexObject(IndexObject):
204 202 """a Debug oriented class to test persistent nodemap
205 203
206 204 We need a simple python object to test API and higher level behavior. See
207 205 the Rust implementation for more serious usage. This should be used only
208 206 through the dedicated `devel.persistent-nodemap` config.
209 207 """
210 208
211 209 def nodemap_data_all(self):
212 210 """Return bytes containing a full serialization of a nodemap
213 211
214 212 The nodemap should be valid for the full set of revisions in the
215 213 index."""
216 214 return nodemaputil.persistent_data(self)
217 215
218 216 def nodemap_data_incremental(self):
219 217 """Return bytes containing a incremental update to persistent nodemap
220 218
221 219 This containst the data for an append-only update of the data provided
222 220 in the last call to `update_nodemap_data`.
223 221 """
224 222 if self._nm_root is None:
225 223 return None
226 224 docket = self._nm_docket
227 225 changed, data = nodemaputil.update_persistent_data(
228 226 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
229 227 )
230 228
231 229 self._nm_root = self._nm_max_idx = self._nm_docket = None
232 230 return docket, changed, data
233 231
234 232 def update_nodemap_data(self, docket, nm_data):
235 233 """provide full block of persisted binary data for a nodemap
236 234
237 235 The data are expected to come from disk. See `nodemap_data_all` for a
238 236 produceur of such data."""
239 237 if nm_data is not None:
240 238 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
241 239 if self._nm_root:
242 240 self._nm_docket = docket
243 241 else:
244 242 self._nm_root = self._nm_max_idx = self._nm_docket = None
245 243
246 244
247 245 class InlinedIndexObject(BaseIndexObject):
248 246 def __init__(self, data, inline=0):
249 247 self._data = data
250 248 self._lgt = self._inline_scan(None)
251 249 self._inline_scan(self._lgt)
252 250 self._extra = []
253 251
254 252 def _inline_scan(self, lgt):
255 253 off = 0
256 254 if lgt is not None:
257 255 self._offsets = [0] * lgt
258 256 count = 0
259 257 while off <= len(self._data) - self.entry_size:
260 258 start = off + self.big_int_size
261 259 (s,) = struct.unpack(
262 260 b'>i',
263 261 self._data[start : start + self.int_size],
264 262 )
265 263 if lgt is not None:
266 264 self._offsets[count] = off
267 265 count += 1
268 266 off += self.entry_size + s
269 267 if off != len(self._data):
270 268 raise ValueError(b"corrupted data")
271 269 return count
272 270
273 271 def __delitem__(self, i):
274 272 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
275 273 raise ValueError(b"deleting slices only supports a:-1 with step 1")
276 274 i = i.start
277 275 self._check_index(i)
278 276 self._stripnodes(i)
279 277 if i < self._lgt:
280 278 self._offsets = self._offsets[:i]
281 279 self._lgt = i
282 280 self._extra = []
283 281 else:
284 282 self._extra = self._extra[: i - self._lgt]
285 283
286 284 def _calculate_index(self, i):
287 285 return self._offsets[i]
288 286
289 287
290 288 def parse_index2(data, inline, revlogv2=False):
291 289 if not inline:
292 290 cls = IndexObject2 if revlogv2 else IndexObject
293 291 return cls(data), None
294 292 cls = InlinedIndexObject
295 293 return cls(data, inline), (0, data)
296 294
297 295
298 296 def parse_index_cl_v2(data):
299 297 return IndexChangelogV2(data), None
300 298
301 299
302 300 class IndexObject2(IndexObject):
303 301 index_format = revlog_constants.INDEX_ENTRY_V2
304 302
305 303 def replace_sidedata_info(
306 304 self,
307 305 rev,
308 306 sidedata_offset,
309 307 sidedata_length,
310 308 offset_flags,
311 309 compression_mode,
312 310 ):
313 311 """
314 312 Replace an existing index entry's sidedata offset and length with new
315 313 ones.
316 314 This cannot be used outside of the context of sidedata rewriting,
317 315 inside the transaction that creates the revision `rev`.
318 316 """
319 317 if rev < 0:
320 318 raise KeyError
321 319 self._check_index(rev)
322 320 if rev < self._lgt:
323 321 msg = b"cannot rewrite entries outside of this transaction"
324 322 raise KeyError(msg)
325 323 else:
326 324 entry = list(self[rev])
327 325 entry[0] = offset_flags
328 326 entry[8] = sidedata_offset
329 327 entry[9] = sidedata_length
330 328 entry[11] = compression_mode
331 329 entry = tuple(entry)
332 330 new = self._pack_entry(rev, entry)
333 331 self._extra[rev - self._lgt] = new
334 332
335 333 def _unpack_entry(self, rev, data):
336 334 data = self.index_format.unpack(data)
337 335 entry = data[:10]
338 336 data_comp = data[10] & 3
339 337 sidedata_comp = (data[10] & (3 << 2)) >> 2
340 338 return entry + (data_comp, sidedata_comp)
341 339
342 340 def _pack_entry(self, rev, entry):
343 341 data = entry[:10]
344 342 data_comp = entry[10] & 3
345 343 sidedata_comp = (entry[11] & 3) << 2
346 344 data += (data_comp | sidedata_comp,)
347 345
348 346 return self.index_format.pack(*data)
349 347
350 348 def entry_binary(self, rev):
351 349 """return the raw binary string representing a revision"""
352 350 entry = self[rev]
353 351 return self._pack_entry(rev, entry)
354 352
355 353 def pack_header(self, header):
356 354 """pack header information as binary"""
357 355 msg = 'version header should go in the docket, not the index: %d'
358 356 msg %= header
359 357 raise error.ProgrammingError(msg)
360 358
361 359
362 360 class IndexChangelogV2(IndexObject2):
363 361 index_format = revlog_constants.INDEX_ENTRY_CL_V2
364 362
365 363 def _unpack_entry(self, rev, data, r=True):
366 364 items = self.index_format.unpack(data)
367 365 entry = items[:3] + (rev, rev) + items[3:8]
368 366 data_comp = items[8] & 3
369 367 sidedata_comp = (items[8] >> 2) & 3
370 368 return entry + (data_comp, sidedata_comp)
371 369
372 370 def _pack_entry(self, rev, entry):
373 371 assert entry[3] == rev, entry[3]
374 372 assert entry[4] == rev, entry[4]
375 373 data = entry[:3] + entry[5:10]
376 374 data_comp = entry[10] & 3
377 375 sidedata_comp = (entry[11] & 3) << 2
378 376 data += (data_comp | sidedata_comp,)
379 377 return self.index_format.pack(*data)
380 378
381 379
382 380 def parse_index_devel_nodemap(data, inline):
383 381 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
384 382 return PersistentNodeMapIndexObject(data), None
385 383
386 384
387 385 def parse_dirstate(dmap, copymap, st):
388 386 parents = [st[:20], st[20:40]]
389 387 # dereference fields so they will be local in loop
390 388 format = b">cllll"
391 389 e_size = struct.calcsize(format)
392 390 pos1 = 40
393 391 l = len(st)
394 392
395 393 # the inner loop
396 394 while pos1 < l:
397 395 pos2 = pos1 + e_size
398 396 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
399 397 pos1 = pos2 + e[4]
400 398 f = st[pos2:pos1]
401 399 if b'\0' in f:
402 400 f, c = f.split(b'\0')
403 401 copymap[f] = c
404 402 dmap[f] = e[:4]
405 403 return parents
406 404
407 405
408 406 def pack_dirstate(dmap, copymap, pl, now):
409 407 now = int(now)
410 408 cs = stringio()
411 409 write = cs.write
412 410 write(b"".join(pl))
413 411 for f, e in pycompat.iteritems(dmap):
414 412 if e[0] == b'n' and e[3] == now:
415 413 # The file was last modified "simultaneously" with the current
416 414 # write to dirstate (i.e. within the same second for file-
417 415 # systems with a granularity of 1 sec). This commonly happens
418 416 # for at least a couple of files on 'update'.
419 417 # The user could change the file without changing its size
420 418 # within the same second. Invalidate the file's mtime in
421 419 # dirstate, forcing future 'status' calls to compare the
422 420 # contents of the file if the size is the same. This prevents
423 421 # mistakenly treating such files as clean.
424 422 e = dirstatetuple(e[0], e[1], e[2], -1)
425 423 dmap[f] = e
426 424
427 425 if f in copymap:
428 426 f = b"%s\0%s" % (f, copymap[f])
429 427 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
430 428 write(e)
431 429 write(f)
432 430 return cs.getvalue()
@@ -1,3405 +1,3400 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import absolute_import
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import errno
21 21 import io
22 22 import os
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullrev,
31 31 sha1nodeconstants,
32 32 short,
33 33 wdirrev,
34 34 )
35 35 from .i18n import _
36 36 from .pycompat import getattr
37 37 from .revlogutils.constants import (
38 38 ALL_KINDS,
39 39 CHANGELOGV2,
40 40 COMP_MODE_DEFAULT,
41 41 COMP_MODE_INLINE,
42 42 COMP_MODE_PLAIN,
43 43 FEATURES_BY_VERSION,
44 44 FLAG_GENERALDELTA,
45 45 FLAG_INLINE_DATA,
46 46 INDEX_HEADER,
47 47 KIND_CHANGELOG,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 revlogutils,
75 76 templatefilters,
76 77 util,
77 78 )
78 79 from .interfaces import (
79 80 repository,
80 81 util as interfaceutil,
81 82 )
82 83 from .revlogutils import (
83 84 censor,
84 85 deltas as deltautil,
85 86 docket as docketutil,
86 87 flagutil,
87 88 nodemap as nodemaputil,
88 89 revlogv0,
89 90 sidedata as sidedatautil,
90 91 )
91 92 from .utils import (
92 93 storageutil,
93 94 stringutil,
94 95 )
95 96
96 97 # blanked usage of all the name to prevent pyflakes constraints
97 98 # We need these name available in the module for extensions.
98 99
99 100 REVLOGV0
100 101 REVLOGV1
101 102 REVLOGV2
102 103 FLAG_INLINE_DATA
103 104 FLAG_GENERALDELTA
104 105 REVLOG_DEFAULT_FLAGS
105 106 REVLOG_DEFAULT_FORMAT
106 107 REVLOG_DEFAULT_VERSION
107 108 REVLOGV1_FLAGS
108 109 REVLOGV2_FLAGS
109 110 REVIDX_ISCENSORED
110 111 REVIDX_ELLIPSIS
111 112 REVIDX_HASCOPIESINFO
112 113 REVIDX_EXTSTORED
113 114 REVIDX_DEFAULT_FLAGS
114 115 REVIDX_FLAGS_ORDER
115 116 REVIDX_RAWTEXT_CHANGING_FLAGS
116 117
117 118 parsers = policy.importmod('parsers')
118 119 rustancestor = policy.importrust('ancestor')
119 120 rustdagop = policy.importrust('dagop')
120 121 rustrevlog = policy.importrust('revlog')
121 122
122 123 # Aliased for performance.
123 124 _zlibdecompress = zlib.decompress
124 125
125 126 # max size of revlog with inline data
126 127 _maxinline = 131072
127 128 _chunksize = 1048576
128 129
129 130 # Flag processors for REVIDX_ELLIPSIS.
130 131 def ellipsisreadprocessor(rl, text):
131 132 return text, False
132 133
133 134
134 135 def ellipsiswriteprocessor(rl, text):
135 136 return text, False
136 137
137 138
138 139 def ellipsisrawprocessor(rl, text):
139 140 return False
140 141
141 142
142 143 ellipsisprocessor = (
143 144 ellipsisreadprocessor,
144 145 ellipsiswriteprocessor,
145 146 ellipsisrawprocessor,
146 147 )
147 148
148 149
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
153
154
155 150 def _verify_revision(rl, skipflags, state, node):
156 151 """Verify the integrity of the given revlog ``node`` while providing a hook
157 152 point for extensions to influence the operation."""
158 153 if skipflags:
159 154 state[b'skipread'].add(node)
160 155 else:
161 156 # Side-effect: read content and verify hash.
162 157 rl.revision(node)
163 158
164 159
165 160 # True if a fast implementation for persistent-nodemap is available
166 161 #
167 162 # We also consider we have a "fast" implementation in "pure" python because
168 163 # people using pure don't really have performance consideration (and a
169 164 # wheelbarrow of other slowness source)
170 165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 166 parsers, 'BaseIndexObject'
172 167 )
173 168
174 169
175 170 @attr.s(slots=True, frozen=True)
176 171 class _revisioninfo(object):
177 172 """Information about a revision that allows building its fulltext
178 173 node: expected hash of the revision
179 174 p1, p2: parent revs of the revision
180 175 btext: built text cache consisting of a one-element list
181 176 cachedelta: (baserev, uncompressed_delta) or None
182 177 flags: flags associated to the revision storage
183 178
184 179 One of btext[0] or cachedelta must be set.
185 180 """
186 181
187 182 node = attr.ib()
188 183 p1 = attr.ib()
189 184 p2 = attr.ib()
190 185 btext = attr.ib()
191 186 textlen = attr.ib()
192 187 cachedelta = attr.ib()
193 188 flags = attr.ib()
194 189
195 190
196 191 @interfaceutil.implementer(repository.irevisiondelta)
197 192 @attr.s(slots=True)
198 193 class revlogrevisiondelta(object):
199 194 node = attr.ib()
200 195 p1node = attr.ib()
201 196 p2node = attr.ib()
202 197 basenode = attr.ib()
203 198 flags = attr.ib()
204 199 baserevisionsize = attr.ib()
205 200 revision = attr.ib()
206 201 delta = attr.ib()
207 202 sidedata = attr.ib()
208 203 protocol_flags = attr.ib()
209 204 linknode = attr.ib(default=None)
210 205
211 206
212 207 @interfaceutil.implementer(repository.iverifyproblem)
213 208 @attr.s(frozen=True)
214 209 class revlogproblem(object):
215 210 warning = attr.ib(default=None)
216 211 error = attr.ib(default=None)
217 212 node = attr.ib(default=None)
218 213
219 214
220 215 def parse_index_v1(data, inline):
221 216 # call the C implementation to parse the index data
222 217 index, cache = parsers.parse_index2(data, inline)
223 218 return index, cache
224 219
225 220
226 221 def parse_index_v2(data, inline):
227 222 # call the C implementation to parse the index data
228 223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
229 224 return index, cache
230 225
231 226
232 227 def parse_index_cl_v2(data, inline):
233 228 # call the C implementation to parse the index data
234 229 assert not inline
235 230 from .pure.parsers import parse_index_cl_v2
236 231
237 232 index, cache = parse_index_cl_v2(data)
238 233 return index, cache
239 234
240 235
241 236 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
242 237
243 238 def parse_index_v1_nodemap(data, inline):
244 239 index, cache = parsers.parse_index_devel_nodemap(data, inline)
245 240 return index, cache
246 241
247 242
248 243 else:
249 244 parse_index_v1_nodemap = None
250 245
251 246
252 247 def parse_index_v1_mixed(data, inline):
253 248 index, cache = parse_index_v1(data, inline)
254 249 return rustrevlog.MixedIndex(index), cache
255 250
256 251
257 252 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
258 253 # signed integer)
259 254 _maxentrysize = 0x7FFFFFFF
260 255
261 256 PARTIAL_READ_MSG = _(
262 257 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
263 258 )
264 259
265 260 FILE_TOO_SHORT_MSG = _(
266 261 b'cannot read from revlog %s;'
267 262 b' expected %d bytes from offset %d, data size is %d'
268 263 )
269 264
270 265
271 266 class revlog(object):
272 267 """
273 268 the underlying revision storage object
274 269
275 270 A revlog consists of two parts, an index and the revision data.
276 271
277 272 The index is a file with a fixed record size containing
278 273 information on each revision, including its nodeid (hash), the
279 274 nodeids of its parents, the position and offset of its data within
280 275 the data file, and the revision it's based on. Finally, each entry
281 276 contains a linkrev entry that can serve as a pointer to external
282 277 data.
283 278
284 279 The revision data itself is a linear collection of data chunks.
285 280 Each chunk represents a revision and is usually represented as a
286 281 delta against the previous chunk. To bound lookup time, runs of
287 282 deltas are limited to about 2 times the length of the original
288 283 version data. This makes retrieval of a version proportional to
289 284 its size, or O(1) relative to the number of revisions.
290 285
291 286 Both pieces of the revlog are written to in an append-only
292 287 fashion, which means we never need to rewrite a file to insert or
293 288 remove data, and can use some simple techniques to avoid the need
294 289 for locking while reading.
295 290
296 291 If checkambig, indexfile is opened with checkambig=True at
297 292 writing, to avoid file stat ambiguity.
298 293
299 294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
300 295 index will be mmapped rather than read if it is larger than the
301 296 configured threshold.
302 297
303 298 If censorable is True, the revlog can have censored revisions.
304 299
305 300 If `upperboundcomp` is not None, this is the expected maximal gain from
306 301 compression for the data content.
307 302
308 303 `concurrencychecker` is an optional function that receives 3 arguments: a
309 304 file handle, a filename, and an expected position. It should check whether
310 305 the current position in the file handle is valid, and log/warn/fail (by
311 306 raising).
312 307
313 308 See mercurial/revlogutils/contants.py for details about the content of an
314 309 index entry.
315 310 """
316 311
317 312 _flagserrorclass = error.RevlogError
318 313
319 314 def __init__(
320 315 self,
321 316 opener,
322 317 target,
323 318 radix,
324 319 postfix=None, # only exist for `tmpcensored` now
325 320 checkambig=False,
326 321 mmaplargeindex=False,
327 322 censorable=False,
328 323 upperboundcomp=None,
329 324 persistentnodemap=False,
330 325 concurrencychecker=None,
331 326 trypending=False,
332 327 ):
333 328 """
334 329 create a revlog object
335 330
336 331 opener is a function that abstracts the file opening operation
337 332 and can be used to implement COW semantics or the like.
338 333
339 334 `target`: a (KIND, ID) tuple that identify the content stored in
340 335 this revlog. It help the rest of the code to understand what the revlog
341 336 is about without having to resort to heuristic and index filename
342 337 analysis. Note: that this must be reliably be set by normal code, but
343 338 that test, debug, or performance measurement code might not set this to
344 339 accurate value.
345 340 """
346 341 self.upperboundcomp = upperboundcomp
347 342
348 343 self.radix = radix
349 344
350 345 self._docket_file = None
351 346 self._indexfile = None
352 347 self._datafile = None
353 348 self._sidedatafile = None
354 349 self._nodemap_file = None
355 350 self.postfix = postfix
356 351 self._trypending = trypending
357 352 self.opener = opener
358 353 if persistentnodemap:
359 354 self._nodemap_file = nodemaputil.get_nodemap_file(self)
360 355
361 356 assert target[0] in ALL_KINDS
362 357 assert len(target) == 2
363 358 self.target = target
364 359 # When True, indexfile is opened with checkambig=True at writing, to
365 360 # avoid file stat ambiguity.
366 361 self._checkambig = checkambig
367 362 self._mmaplargeindex = mmaplargeindex
368 363 self._censorable = censorable
369 364 # 3-tuple of (node, rev, text) for a raw revision.
370 365 self._revisioncache = None
371 366 # Maps rev to chain base rev.
372 367 self._chainbasecache = util.lrucachedict(100)
373 368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
374 369 self._chunkcache = (0, b'')
375 370 # How much data to read and cache into the raw revlog data cache.
376 371 self._chunkcachesize = 65536
377 372 self._maxchainlen = None
378 373 self._deltabothparents = True
379 374 self.index = None
380 375 self._docket = None
381 376 self._nodemap_docket = None
382 377 # Mapping of partial identifiers to full nodes.
383 378 self._pcache = {}
384 379 # Mapping of revision integer to full node.
385 380 self._compengine = b'zlib'
386 381 self._compengineopts = {}
387 382 self._maxdeltachainspan = -1
388 383 self._withsparseread = False
389 384 self._sparserevlog = False
390 385 self.hassidedata = False
391 386 self._srdensitythreshold = 0.50
392 387 self._srmingapsize = 262144
393 388
394 389 # Make copy of flag processors so each revlog instance can support
395 390 # custom flags.
396 391 self._flagprocessors = dict(flagutil.flagprocessors)
397 392
398 393 # 3-tuple of file handles being used for active writing.
399 394 self._writinghandles = None
400 395 # prevent nesting of addgroup
401 396 self._adding_group = None
402 397
403 398 self._loadindex()
404 399
405 400 self._concurrencychecker = concurrencychecker
406 401
407 402 def _init_opts(self):
408 403 """process options (from above/config) to setup associated default revlog mode
409 404
410 405 These values might be affected when actually reading on disk information.
411 406
412 407 The relevant values are returned for use in _loadindex().
413 408
414 409 * newversionflags:
415 410 version header to use if we need to create a new revlog
416 411
417 412 * mmapindexthreshold:
418 413 minimal index size for start to use mmap
419 414
420 415 * force_nodemap:
421 416 force the usage of a "development" version of the nodemap code
422 417 """
423 418 mmapindexthreshold = None
424 419 opts = self.opener.options
425 420
426 421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 422 new_header = CHANGELOGV2
428 423 elif b'revlogv2' in opts:
429 424 new_header = REVLOGV2
430 425 elif b'revlogv1' in opts:
431 426 new_header = REVLOGV1 | FLAG_INLINE_DATA
432 427 if b'generaldelta' in opts:
433 428 new_header |= FLAG_GENERALDELTA
434 429 elif b'revlogv0' in self.opener.options:
435 430 new_header = REVLOGV0
436 431 else:
437 432 new_header = REVLOG_DEFAULT_VERSION
438 433
439 434 if b'chunkcachesize' in opts:
440 435 self._chunkcachesize = opts[b'chunkcachesize']
441 436 if b'maxchainlen' in opts:
442 437 self._maxchainlen = opts[b'maxchainlen']
443 438 if b'deltabothparents' in opts:
444 439 self._deltabothparents = opts[b'deltabothparents']
445 440 self._lazydelta = bool(opts.get(b'lazydelta', True))
446 441 self._lazydeltabase = False
447 442 if self._lazydelta:
448 443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
449 444 if b'compengine' in opts:
450 445 self._compengine = opts[b'compengine']
451 446 if b'zlib.level' in opts:
452 447 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
453 448 if b'zstd.level' in opts:
454 449 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
455 450 if b'maxdeltachainspan' in opts:
456 451 self._maxdeltachainspan = opts[b'maxdeltachainspan']
457 452 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
458 453 mmapindexthreshold = opts[b'mmapindexthreshold']
459 454 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
460 455 withsparseread = bool(opts.get(b'with-sparse-read', False))
461 456 # sparse-revlog forces sparse-read
462 457 self._withsparseread = self._sparserevlog or withsparseread
463 458 if b'sparse-read-density-threshold' in opts:
464 459 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
465 460 if b'sparse-read-min-gap-size' in opts:
466 461 self._srmingapsize = opts[b'sparse-read-min-gap-size']
467 462 if opts.get(b'enableellipsis'):
468 463 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
469 464
470 465 # revlog v0 doesn't have flag processors
471 466 for flag, processor in pycompat.iteritems(
472 467 opts.get(b'flagprocessors', {})
473 468 ):
474 469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
475 470
476 471 if self._chunkcachesize <= 0:
477 472 raise error.RevlogError(
478 473 _(b'revlog chunk cache size %r is not greater than 0')
479 474 % self._chunkcachesize
480 475 )
481 476 elif self._chunkcachesize & (self._chunkcachesize - 1):
482 477 raise error.RevlogError(
483 478 _(b'revlog chunk cache size %r is not a power of 2')
484 479 % self._chunkcachesize
485 480 )
486 481 force_nodemap = opts.get(b'devel-force-nodemap', False)
487 482 return new_header, mmapindexthreshold, force_nodemap
488 483
489 484 def _get_data(self, filepath, mmap_threshold, size=None):
490 485 """return a file content with or without mmap
491 486
492 487 If the file is missing return the empty string"""
493 488 try:
494 489 with self.opener(filepath) as fp:
495 490 if mmap_threshold is not None:
496 491 file_size = self.opener.fstat(fp).st_size
497 492 if file_size >= mmap_threshold:
498 493 if size is not None:
499 494 # avoid potentiel mmap crash
500 495 size = min(file_size, size)
501 496 # TODO: should .close() to release resources without
502 497 # relying on Python GC
503 498 if size is None:
504 499 return util.buffer(util.mmapread(fp))
505 500 else:
506 501 return util.buffer(util.mmapread(fp, size))
507 502 if size is None:
508 503 return fp.read()
509 504 else:
510 505 return fp.read(size)
511 506 except IOError as inst:
512 507 if inst.errno != errno.ENOENT:
513 508 raise
514 509 return b''
515 510
516 511 def _loadindex(self):
517 512
518 513 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
519 514
520 515 if self.postfix is not None:
521 516 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
522 517 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
523 518 entry_point = b'%s.i.a' % self.radix
524 519 else:
525 520 entry_point = b'%s.i' % self.radix
526 521
527 522 entry_data = b''
528 523 self._initempty = True
529 524 entry_data = self._get_data(entry_point, mmapindexthreshold)
530 525 if len(entry_data) > 0:
531 526 header = INDEX_HEADER.unpack(entry_data[:4])[0]
532 527 self._initempty = False
533 528 else:
534 529 header = new_header
535 530
536 531 self._format_flags = header & ~0xFFFF
537 532 self._format_version = header & 0xFFFF
538 533
539 534 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
540 535 if supported_flags is None:
541 536 msg = _(b'unknown version (%d) in revlog %s')
542 537 msg %= (self._format_version, self.display_id)
543 538 raise error.RevlogError(msg)
544 539 elif self._format_flags & ~supported_flags:
545 540 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
546 541 display_flag = self._format_flags >> 16
547 542 msg %= (display_flag, self._format_version, self.display_id)
548 543 raise error.RevlogError(msg)
549 544
550 545 features = FEATURES_BY_VERSION[self._format_version]
551 546 self._inline = features[b'inline'](self._format_flags)
552 547 self._generaldelta = features[b'generaldelta'](self._format_flags)
553 548 self.hassidedata = features[b'sidedata']
554 549
555 550 if not features[b'docket']:
556 551 self._indexfile = entry_point
557 552 index_data = entry_data
558 553 else:
559 554 self._docket_file = entry_point
560 555 if self._initempty:
561 556 self._docket = docketutil.default_docket(self, header)
562 557 else:
563 558 self._docket = docketutil.parse_docket(
564 559 self, entry_data, use_pending=self._trypending
565 560 )
566 561 self._indexfile = self._docket.index_filepath()
567 562 index_data = b''
568 563 index_size = self._docket.index_end
569 564 if index_size > 0:
570 565 index_data = self._get_data(
571 566 self._indexfile, mmapindexthreshold, size=index_size
572 567 )
573 568 if len(index_data) < index_size:
574 569 msg = _(b'too few index data for %s: got %d, expected %d')
575 570 msg %= (self.display_id, len(index_data), index_size)
576 571 raise error.RevlogError(msg)
577 572
578 573 self._inline = False
579 574 # generaldelta implied by version 2 revlogs.
580 575 self._generaldelta = True
581 576 # the logic for persistent nodemap will be dealt with within the
582 577 # main docket, so disable it for now.
583 578 self._nodemap_file = None
584 579
585 580 if self._docket is not None:
586 581 self._datafile = self._docket.data_filepath()
587 582 self._sidedatafile = self._docket.sidedata_filepath()
588 583 elif self.postfix is None:
589 584 self._datafile = b'%s.d' % self.radix
590 585 else:
591 586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
592 587
593 588 self.nodeconstants = sha1nodeconstants
594 589 self.nullid = self.nodeconstants.nullid
595 590
596 591 # sparse-revlog can't be on without general-delta (issue6056)
597 592 if not self._generaldelta:
598 593 self._sparserevlog = False
599 594
600 595 self._storedeltachains = True
601 596
602 597 devel_nodemap = (
603 598 self._nodemap_file
604 599 and force_nodemap
605 600 and parse_index_v1_nodemap is not None
606 601 )
607 602
608 603 use_rust_index = False
609 604 if rustrevlog is not None:
610 605 if self._nodemap_file is not None:
611 606 use_rust_index = True
612 607 else:
613 608 use_rust_index = self.opener.options.get(b'rust.index')
614 609
615 610 self._parse_index = parse_index_v1
616 611 if self._format_version == REVLOGV0:
617 612 self._parse_index = revlogv0.parse_index_v0
618 613 elif self._format_version == REVLOGV2:
619 614 self._parse_index = parse_index_v2
620 615 elif self._format_version == CHANGELOGV2:
621 616 self._parse_index = parse_index_cl_v2
622 617 elif devel_nodemap:
623 618 self._parse_index = parse_index_v1_nodemap
624 619 elif use_rust_index:
625 620 self._parse_index = parse_index_v1_mixed
626 621 try:
627 622 d = self._parse_index(index_data, self._inline)
628 623 index, _chunkcache = d
629 624 use_nodemap = (
630 625 not self._inline
631 626 and self._nodemap_file is not None
632 627 and util.safehasattr(index, 'update_nodemap_data')
633 628 )
634 629 if use_nodemap:
635 630 nodemap_data = nodemaputil.persisted_data(self)
636 631 if nodemap_data is not None:
637 632 docket = nodemap_data[0]
638 633 if (
639 634 len(d[0]) > docket.tip_rev
640 635 and d[0][docket.tip_rev][7] == docket.tip_node
641 636 ):
642 637 # no changelog tampering
643 638 self._nodemap_docket = docket
644 639 index.update_nodemap_data(*nodemap_data)
645 640 except (ValueError, IndexError):
646 641 raise error.RevlogError(
647 642 _(b"index %s is corrupted") % self.display_id
648 643 )
649 644 self.index, self._chunkcache = d
650 645 if not self._chunkcache:
651 646 self._chunkclear()
652 647 # revnum -> (chain-length, sum-delta-length)
653 648 self._chaininfocache = util.lrucachedict(500)
654 649 # revlog header -> revlog compressor
655 650 self._decompressors = {}
656 651
657 652 @util.propertycache
658 653 def revlog_kind(self):
659 654 return self.target[0]
660 655
661 656 @util.propertycache
662 657 def display_id(self):
663 658 """The public facing "ID" of the revlog that we use in message"""
664 659 # Maybe we should build a user facing representation of
665 660 # revlog.target instead of using `self.radix`
666 661 return self.radix
667 662
668 663 def _get_decompressor(self, t):
669 664 try:
670 665 compressor = self._decompressors[t]
671 666 except KeyError:
672 667 try:
673 668 engine = util.compengines.forrevlogheader(t)
674 669 compressor = engine.revlogcompressor(self._compengineopts)
675 670 self._decompressors[t] = compressor
676 671 except KeyError:
677 672 raise error.RevlogError(
678 673 _(b'unknown compression type %s') % binascii.hexlify(t)
679 674 )
680 675 return compressor
681 676
682 677 @util.propertycache
683 678 def _compressor(self):
684 679 engine = util.compengines[self._compengine]
685 680 return engine.revlogcompressor(self._compengineopts)
686 681
687 682 @util.propertycache
688 683 def _decompressor(self):
689 684 """the default decompressor"""
690 685 if self._docket is None:
691 686 return None
692 687 t = self._docket.default_compression_header
693 688 c = self._get_decompressor(t)
694 689 return c.decompress
695 690
696 691 def _indexfp(self):
697 692 """file object for the revlog's index file"""
698 693 return self.opener(self._indexfile, mode=b"r")
699 694
700 695 def __index_write_fp(self):
701 696 # You should not use this directly and use `_writing` instead
702 697 try:
703 698 f = self.opener(
704 699 self._indexfile, mode=b"r+", checkambig=self._checkambig
705 700 )
706 701 if self._docket is None:
707 702 f.seek(0, os.SEEK_END)
708 703 else:
709 704 f.seek(self._docket.index_end, os.SEEK_SET)
710 705 return f
711 706 except IOError as inst:
712 707 if inst.errno != errno.ENOENT:
713 708 raise
714 709 return self.opener(
715 710 self._indexfile, mode=b"w+", checkambig=self._checkambig
716 711 )
717 712
718 713 def __index_new_fp(self):
719 714 # You should not use this unless you are upgrading from inline revlog
720 715 return self.opener(
721 716 self._indexfile,
722 717 mode=b"w",
723 718 checkambig=self._checkambig,
724 719 atomictemp=True,
725 720 )
726 721
727 722 def _datafp(self, mode=b'r'):
728 723 """file object for the revlog's data file"""
729 724 return self.opener(self._datafile, mode=mode)
730 725
731 726 @contextlib.contextmanager
732 727 def _datareadfp(self, existingfp=None):
733 728 """file object suitable to read data"""
734 729 # Use explicit file handle, if given.
735 730 if existingfp is not None:
736 731 yield existingfp
737 732
738 733 # Use a file handle being actively used for writes, if available.
739 734 # There is some danger to doing this because reads will seek the
740 735 # file. However, _writeentry() performs a SEEK_END before all writes,
741 736 # so we should be safe.
742 737 elif self._writinghandles:
743 738 if self._inline:
744 739 yield self._writinghandles[0]
745 740 else:
746 741 yield self._writinghandles[1]
747 742
748 743 # Otherwise open a new file handle.
749 744 else:
750 745 if self._inline:
751 746 func = self._indexfp
752 747 else:
753 748 func = self._datafp
754 749 with func() as fp:
755 750 yield fp
756 751
757 752 @contextlib.contextmanager
758 753 def _sidedatareadfp(self):
759 754 """file object suitable to read sidedata"""
760 755 if self._writinghandles:
761 756 yield self._writinghandles[2]
762 757 else:
763 758 with self.opener(self._sidedatafile) as fp:
764 759 yield fp
765 760
766 761 def tiprev(self):
767 762 return len(self.index) - 1
768 763
769 764 def tip(self):
770 765 return self.node(self.tiprev())
771 766
772 767 def __contains__(self, rev):
773 768 return 0 <= rev < len(self)
774 769
775 770 def __len__(self):
776 771 return len(self.index)
777 772
778 773 def __iter__(self):
779 774 return iter(pycompat.xrange(len(self)))
780 775
781 776 def revs(self, start=0, stop=None):
782 777 """iterate over all rev in this revlog (from start to stop)"""
783 778 return storageutil.iterrevs(len(self), start=start, stop=stop)
784 779
785 780 @property
786 781 def nodemap(self):
787 782 msg = (
788 783 b"revlog.nodemap is deprecated, "
789 784 b"use revlog.index.[has_node|rev|get_rev]"
790 785 )
791 786 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
792 787 return self.index.nodemap
793 788
794 789 @property
795 790 def _nodecache(self):
796 791 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
797 792 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
798 793 return self.index.nodemap
799 794
800 795 def hasnode(self, node):
801 796 try:
802 797 self.rev(node)
803 798 return True
804 799 except KeyError:
805 800 return False
806 801
807 802 def candelta(self, baserev, rev):
808 803 """whether two revisions (baserev, rev) can be delta-ed or not"""
809 804 # Disable delta if either rev requires a content-changing flag
810 805 # processor (ex. LFS). This is because such flag processor can alter
811 806 # the rawtext content that the delta will be based on, and two clients
812 807 # could have a same revlog node with different flags (i.e. different
813 808 # rawtext contents) and the delta could be incompatible.
814 809 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
815 810 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
816 811 ):
817 812 return False
818 813 return True
819 814
820 815 def update_caches(self, transaction):
821 816 if self._nodemap_file is not None:
822 817 if transaction is None:
823 818 nodemaputil.update_persistent_nodemap(self)
824 819 else:
825 820 nodemaputil.setup_persistent_nodemap(transaction, self)
826 821
827 822 def clearcaches(self):
828 823 self._revisioncache = None
829 824 self._chainbasecache.clear()
830 825 self._chunkcache = (0, b'')
831 826 self._pcache = {}
832 827 self._nodemap_docket = None
833 828 self.index.clearcaches()
834 829 # The python code is the one responsible for validating the docket, we
835 830 # end up having to refresh it here.
836 831 use_nodemap = (
837 832 not self._inline
838 833 and self._nodemap_file is not None
839 834 and util.safehasattr(self.index, 'update_nodemap_data')
840 835 )
841 836 if use_nodemap:
842 837 nodemap_data = nodemaputil.persisted_data(self)
843 838 if nodemap_data is not None:
844 839 self._nodemap_docket = nodemap_data[0]
845 840 self.index.update_nodemap_data(*nodemap_data)
846 841
847 842 def rev(self, node):
848 843 try:
849 844 return self.index.rev(node)
850 845 except TypeError:
851 846 raise
852 847 except error.RevlogError:
853 848 # parsers.c radix tree lookup failed
854 849 if (
855 850 node == self.nodeconstants.wdirid
856 851 or node in self.nodeconstants.wdirfilenodeids
857 852 ):
858 853 raise error.WdirUnsupported
859 854 raise error.LookupError(node, self.display_id, _(b'no node'))
860 855
861 856 # Accessors for index entries.
862 857
863 858 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
864 859 # are flags.
865 860 def start(self, rev):
866 861 return int(self.index[rev][0] >> 16)
867 862
868 863 def sidedata_cut_off(self, rev):
869 864 sd_cut_off = self.index[rev][8]
870 865 if sd_cut_off != 0:
871 866 return sd_cut_off
872 867 # This is some annoying dance, because entries without sidedata
873 868 # currently use 0 as their ofsset. (instead of previous-offset +
874 869 # previous-size)
875 870 #
876 871 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
877 872 # In the meantime, we need this.
878 873 while 0 <= rev:
879 874 e = self.index[rev]
880 875 if e[9] != 0:
881 876 return e[8] + e[9]
882 877 rev -= 1
883 878 return 0
884 879
885 880 def flags(self, rev):
886 881 return self.index[rev][0] & 0xFFFF
887 882
888 883 def length(self, rev):
889 884 return self.index[rev][1]
890 885
891 886 def sidedata_length(self, rev):
892 887 if not self.hassidedata:
893 888 return 0
894 889 return self.index[rev][9]
895 890
896 891 def rawsize(self, rev):
897 892 """return the length of the uncompressed text for a given revision"""
898 893 l = self.index[rev][2]
899 894 if l >= 0:
900 895 return l
901 896
902 897 t = self.rawdata(rev)
903 898 return len(t)
904 899
905 900 def size(self, rev):
906 901 """length of non-raw text (processed by a "read" flag processor)"""
907 902 # fast path: if no "read" flag processor could change the content,
908 903 # size is rawsize. note: ELLIPSIS is known to not change the content.
909 904 flags = self.flags(rev)
910 905 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
911 906 return self.rawsize(rev)
912 907
913 908 return len(self.revision(rev, raw=False))
914 909
915 910 def chainbase(self, rev):
916 911 base = self._chainbasecache.get(rev)
917 912 if base is not None:
918 913 return base
919 914
920 915 index = self.index
921 916 iterrev = rev
922 917 base = index[iterrev][3]
923 918 while base != iterrev:
924 919 iterrev = base
925 920 base = index[iterrev][3]
926 921
927 922 self._chainbasecache[rev] = base
928 923 return base
929 924
930 925 def linkrev(self, rev):
931 926 return self.index[rev][4]
932 927
933 928 def parentrevs(self, rev):
934 929 try:
935 930 entry = self.index[rev]
936 931 except IndexError:
937 932 if rev == wdirrev:
938 933 raise error.WdirUnsupported
939 934 raise
940 935 if entry[5] == nullrev:
941 936 return entry[6], entry[5]
942 937 else:
943 938 return entry[5], entry[6]
944 939
945 940 # fast parentrevs(rev) where rev isn't filtered
946 941 _uncheckedparentrevs = parentrevs
947 942
948 943 def node(self, rev):
949 944 try:
950 945 return self.index[rev][7]
951 946 except IndexError:
952 947 if rev == wdirrev:
953 948 raise error.WdirUnsupported
954 949 raise
955 950
956 951 # Derived from index values.
957 952
958 953 def end(self, rev):
959 954 return self.start(rev) + self.length(rev)
960 955
961 956 def parents(self, node):
962 957 i = self.index
963 958 d = i[self.rev(node)]
964 959 # inline node() to avoid function call overhead
965 960 if d[5] == self.nullid:
966 961 return i[d[6]][7], i[d[5]][7]
967 962 else:
968 963 return i[d[5]][7], i[d[6]][7]
969 964
970 965 def chainlen(self, rev):
971 966 return self._chaininfo(rev)[0]
972 967
973 968 def _chaininfo(self, rev):
974 969 chaininfocache = self._chaininfocache
975 970 if rev in chaininfocache:
976 971 return chaininfocache[rev]
977 972 index = self.index
978 973 generaldelta = self._generaldelta
979 974 iterrev = rev
980 975 e = index[iterrev]
981 976 clen = 0
982 977 compresseddeltalen = 0
983 978 while iterrev != e[3]:
984 979 clen += 1
985 980 compresseddeltalen += e[1]
986 981 if generaldelta:
987 982 iterrev = e[3]
988 983 else:
989 984 iterrev -= 1
990 985 if iterrev in chaininfocache:
991 986 t = chaininfocache[iterrev]
992 987 clen += t[0]
993 988 compresseddeltalen += t[1]
994 989 break
995 990 e = index[iterrev]
996 991 else:
997 992 # Add text length of base since decompressing that also takes
998 993 # work. For cache hits the length is already included.
999 994 compresseddeltalen += e[1]
1000 995 r = (clen, compresseddeltalen)
1001 996 chaininfocache[rev] = r
1002 997 return r
1003 998
1004 999 def _deltachain(self, rev, stoprev=None):
1005 1000 """Obtain the delta chain for a revision.
1006 1001
1007 1002 ``stoprev`` specifies a revision to stop at. If not specified, we
1008 1003 stop at the base of the chain.
1009 1004
1010 1005 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1011 1006 revs in ascending order and ``stopped`` is a bool indicating whether
1012 1007 ``stoprev`` was hit.
1013 1008 """
1014 1009 # Try C implementation.
1015 1010 try:
1016 1011 return self.index.deltachain(rev, stoprev, self._generaldelta)
1017 1012 except AttributeError:
1018 1013 pass
1019 1014
1020 1015 chain = []
1021 1016
1022 1017 # Alias to prevent attribute lookup in tight loop.
1023 1018 index = self.index
1024 1019 generaldelta = self._generaldelta
1025 1020
1026 1021 iterrev = rev
1027 1022 e = index[iterrev]
1028 1023 while iterrev != e[3] and iterrev != stoprev:
1029 1024 chain.append(iterrev)
1030 1025 if generaldelta:
1031 1026 iterrev = e[3]
1032 1027 else:
1033 1028 iterrev -= 1
1034 1029 e = index[iterrev]
1035 1030
1036 1031 if iterrev == stoprev:
1037 1032 stopped = True
1038 1033 else:
1039 1034 chain.append(iterrev)
1040 1035 stopped = False
1041 1036
1042 1037 chain.reverse()
1043 1038 return chain, stopped
1044 1039
1045 1040 def ancestors(self, revs, stoprev=0, inclusive=False):
1046 1041 """Generate the ancestors of 'revs' in reverse revision order.
1047 1042 Does not generate revs lower than stoprev.
1048 1043
1049 1044 See the documentation for ancestor.lazyancestors for more details."""
1050 1045
1051 1046 # first, make sure start revisions aren't filtered
1052 1047 revs = list(revs)
1053 1048 checkrev = self.node
1054 1049 for r in revs:
1055 1050 checkrev(r)
1056 1051 # and we're sure ancestors aren't filtered as well
1057 1052
1058 1053 if rustancestor is not None and self.index.rust_ext_compat:
1059 1054 lazyancestors = rustancestor.LazyAncestors
1060 1055 arg = self.index
1061 1056 else:
1062 1057 lazyancestors = ancestor.lazyancestors
1063 1058 arg = self._uncheckedparentrevs
1064 1059 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1065 1060
1066 1061 def descendants(self, revs):
1067 1062 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1068 1063
1069 1064 def findcommonmissing(self, common=None, heads=None):
1070 1065 """Return a tuple of the ancestors of common and the ancestors of heads
1071 1066 that are not ancestors of common. In revset terminology, we return the
1072 1067 tuple:
1073 1068
1074 1069 ::common, (::heads) - (::common)
1075 1070
1076 1071 The list is sorted by revision number, meaning it is
1077 1072 topologically sorted.
1078 1073
1079 1074 'heads' and 'common' are both lists of node IDs. If heads is
1080 1075 not supplied, uses all of the revlog's heads. If common is not
1081 1076 supplied, uses nullid."""
1082 1077 if common is None:
1083 1078 common = [self.nullid]
1084 1079 if heads is None:
1085 1080 heads = self.heads()
1086 1081
1087 1082 common = [self.rev(n) for n in common]
1088 1083 heads = [self.rev(n) for n in heads]
1089 1084
1090 1085 # we want the ancestors, but inclusive
1091 1086 class lazyset(object):
1092 1087 def __init__(self, lazyvalues):
1093 1088 self.addedvalues = set()
1094 1089 self.lazyvalues = lazyvalues
1095 1090
1096 1091 def __contains__(self, value):
1097 1092 return value in self.addedvalues or value in self.lazyvalues
1098 1093
1099 1094 def __iter__(self):
1100 1095 added = self.addedvalues
1101 1096 for r in added:
1102 1097 yield r
1103 1098 for r in self.lazyvalues:
1104 1099 if not r in added:
1105 1100 yield r
1106 1101
1107 1102 def add(self, value):
1108 1103 self.addedvalues.add(value)
1109 1104
1110 1105 def update(self, values):
1111 1106 self.addedvalues.update(values)
1112 1107
1113 1108 has = lazyset(self.ancestors(common))
1114 1109 has.add(nullrev)
1115 1110 has.update(common)
1116 1111
1117 1112 # take all ancestors from heads that aren't in has
1118 1113 missing = set()
1119 1114 visit = collections.deque(r for r in heads if r not in has)
1120 1115 while visit:
1121 1116 r = visit.popleft()
1122 1117 if r in missing:
1123 1118 continue
1124 1119 else:
1125 1120 missing.add(r)
1126 1121 for p in self.parentrevs(r):
1127 1122 if p not in has:
1128 1123 visit.append(p)
1129 1124 missing = list(missing)
1130 1125 missing.sort()
1131 1126 return has, [self.node(miss) for miss in missing]
1132 1127
1133 1128 def incrementalmissingrevs(self, common=None):
1134 1129 """Return an object that can be used to incrementally compute the
1135 1130 revision numbers of the ancestors of arbitrary sets that are not
1136 1131 ancestors of common. This is an ancestor.incrementalmissingancestors
1137 1132 object.
1138 1133
1139 1134 'common' is a list of revision numbers. If common is not supplied, uses
1140 1135 nullrev.
1141 1136 """
1142 1137 if common is None:
1143 1138 common = [nullrev]
1144 1139
1145 1140 if rustancestor is not None and self.index.rust_ext_compat:
1146 1141 return rustancestor.MissingAncestors(self.index, common)
1147 1142 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1148 1143
1149 1144 def findmissingrevs(self, common=None, heads=None):
1150 1145 """Return the revision numbers of the ancestors of heads that
1151 1146 are not ancestors of common.
1152 1147
1153 1148 More specifically, return a list of revision numbers corresponding to
1154 1149 nodes N such that every N satisfies the following constraints:
1155 1150
1156 1151 1. N is an ancestor of some node in 'heads'
1157 1152 2. N is not an ancestor of any node in 'common'
1158 1153
1159 1154 The list is sorted by revision number, meaning it is
1160 1155 topologically sorted.
1161 1156
1162 1157 'heads' and 'common' are both lists of revision numbers. If heads is
1163 1158 not supplied, uses all of the revlog's heads. If common is not
1164 1159 supplied, uses nullid."""
1165 1160 if common is None:
1166 1161 common = [nullrev]
1167 1162 if heads is None:
1168 1163 heads = self.headrevs()
1169 1164
1170 1165 inc = self.incrementalmissingrevs(common=common)
1171 1166 return inc.missingancestors(heads)
1172 1167
1173 1168 def findmissing(self, common=None, heads=None):
1174 1169 """Return the ancestors of heads that are not ancestors of common.
1175 1170
1176 1171 More specifically, return a list of nodes N such that every N
1177 1172 satisfies the following constraints:
1178 1173
1179 1174 1. N is an ancestor of some node in 'heads'
1180 1175 2. N is not an ancestor of any node in 'common'
1181 1176
1182 1177 The list is sorted by revision number, meaning it is
1183 1178 topologically sorted.
1184 1179
1185 1180 'heads' and 'common' are both lists of node IDs. If heads is
1186 1181 not supplied, uses all of the revlog's heads. If common is not
1187 1182 supplied, uses nullid."""
1188 1183 if common is None:
1189 1184 common = [self.nullid]
1190 1185 if heads is None:
1191 1186 heads = self.heads()
1192 1187
1193 1188 common = [self.rev(n) for n in common]
1194 1189 heads = [self.rev(n) for n in heads]
1195 1190
1196 1191 inc = self.incrementalmissingrevs(common=common)
1197 1192 return [self.node(r) for r in inc.missingancestors(heads)]
1198 1193
1199 1194 def nodesbetween(self, roots=None, heads=None):
1200 1195 """Return a topological path from 'roots' to 'heads'.
1201 1196
1202 1197 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1203 1198 topologically sorted list of all nodes N that satisfy both of
1204 1199 these constraints:
1205 1200
1206 1201 1. N is a descendant of some node in 'roots'
1207 1202 2. N is an ancestor of some node in 'heads'
1208 1203
1209 1204 Every node is considered to be both a descendant and an ancestor
1210 1205 of itself, so every reachable node in 'roots' and 'heads' will be
1211 1206 included in 'nodes'.
1212 1207
1213 1208 'outroots' is the list of reachable nodes in 'roots', i.e., the
1214 1209 subset of 'roots' that is returned in 'nodes'. Likewise,
1215 1210 'outheads' is the subset of 'heads' that is also in 'nodes'.
1216 1211
1217 1212 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1218 1213 unspecified, uses nullid as the only root. If 'heads' is
1219 1214 unspecified, uses list of all of the revlog's heads."""
1220 1215 nonodes = ([], [], [])
1221 1216 if roots is not None:
1222 1217 roots = list(roots)
1223 1218 if not roots:
1224 1219 return nonodes
1225 1220 lowestrev = min([self.rev(n) for n in roots])
1226 1221 else:
1227 1222 roots = [self.nullid] # Everybody's a descendant of nullid
1228 1223 lowestrev = nullrev
1229 1224 if (lowestrev == nullrev) and (heads is None):
1230 1225 # We want _all_ the nodes!
1231 1226 return (
1232 1227 [self.node(r) for r in self],
1233 1228 [self.nullid],
1234 1229 list(self.heads()),
1235 1230 )
1236 1231 if heads is None:
1237 1232 # All nodes are ancestors, so the latest ancestor is the last
1238 1233 # node.
1239 1234 highestrev = len(self) - 1
1240 1235 # Set ancestors to None to signal that every node is an ancestor.
1241 1236 ancestors = None
1242 1237 # Set heads to an empty dictionary for later discovery of heads
1243 1238 heads = {}
1244 1239 else:
1245 1240 heads = list(heads)
1246 1241 if not heads:
1247 1242 return nonodes
1248 1243 ancestors = set()
1249 1244 # Turn heads into a dictionary so we can remove 'fake' heads.
1250 1245 # Also, later we will be using it to filter out the heads we can't
1251 1246 # find from roots.
1252 1247 heads = dict.fromkeys(heads, False)
1253 1248 # Start at the top and keep marking parents until we're done.
1254 1249 nodestotag = set(heads)
1255 1250 # Remember where the top was so we can use it as a limit later.
1256 1251 highestrev = max([self.rev(n) for n in nodestotag])
1257 1252 while nodestotag:
1258 1253 # grab a node to tag
1259 1254 n = nodestotag.pop()
1260 1255 # Never tag nullid
1261 1256 if n == self.nullid:
1262 1257 continue
1263 1258 # A node's revision number represents its place in a
1264 1259 # topologically sorted list of nodes.
1265 1260 r = self.rev(n)
1266 1261 if r >= lowestrev:
1267 1262 if n not in ancestors:
1268 1263 # If we are possibly a descendant of one of the roots
1269 1264 # and we haven't already been marked as an ancestor
1270 1265 ancestors.add(n) # Mark as ancestor
1271 1266 # Add non-nullid parents to list of nodes to tag.
1272 1267 nodestotag.update(
1273 1268 [p for p in self.parents(n) if p != self.nullid]
1274 1269 )
1275 1270 elif n in heads: # We've seen it before, is it a fake head?
1276 1271 # So it is, real heads should not be the ancestors of
1277 1272 # any other heads.
1278 1273 heads.pop(n)
1279 1274 if not ancestors:
1280 1275 return nonodes
1281 1276 # Now that we have our set of ancestors, we want to remove any
1282 1277 # roots that are not ancestors.
1283 1278
1284 1279 # If one of the roots was nullid, everything is included anyway.
1285 1280 if lowestrev > nullrev:
1286 1281 # But, since we weren't, let's recompute the lowest rev to not
1287 1282 # include roots that aren't ancestors.
1288 1283
1289 1284 # Filter out roots that aren't ancestors of heads
1290 1285 roots = [root for root in roots if root in ancestors]
1291 1286 # Recompute the lowest revision
1292 1287 if roots:
1293 1288 lowestrev = min([self.rev(root) for root in roots])
1294 1289 else:
1295 1290 # No more roots? Return empty list
1296 1291 return nonodes
1297 1292 else:
1298 1293 # We are descending from nullid, and don't need to care about
1299 1294 # any other roots.
1300 1295 lowestrev = nullrev
1301 1296 roots = [self.nullid]
1302 1297 # Transform our roots list into a set.
1303 1298 descendants = set(roots)
1304 1299 # Also, keep the original roots so we can filter out roots that aren't
1305 1300 # 'real' roots (i.e. are descended from other roots).
1306 1301 roots = descendants.copy()
1307 1302 # Our topologically sorted list of output nodes.
1308 1303 orderedout = []
1309 1304 # Don't start at nullid since we don't want nullid in our output list,
1310 1305 # and if nullid shows up in descendants, empty parents will look like
1311 1306 # they're descendants.
1312 1307 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1313 1308 n = self.node(r)
1314 1309 isdescendant = False
1315 1310 if lowestrev == nullrev: # Everybody is a descendant of nullid
1316 1311 isdescendant = True
1317 1312 elif n in descendants:
1318 1313 # n is already a descendant
1319 1314 isdescendant = True
1320 1315 # This check only needs to be done here because all the roots
1321 1316 # will start being marked is descendants before the loop.
1322 1317 if n in roots:
1323 1318 # If n was a root, check if it's a 'real' root.
1324 1319 p = tuple(self.parents(n))
1325 1320 # If any of its parents are descendants, it's not a root.
1326 1321 if (p[0] in descendants) or (p[1] in descendants):
1327 1322 roots.remove(n)
1328 1323 else:
1329 1324 p = tuple(self.parents(n))
1330 1325 # A node is a descendant if either of its parents are
1331 1326 # descendants. (We seeded the dependents list with the roots
1332 1327 # up there, remember?)
1333 1328 if (p[0] in descendants) or (p[1] in descendants):
1334 1329 descendants.add(n)
1335 1330 isdescendant = True
1336 1331 if isdescendant and ((ancestors is None) or (n in ancestors)):
1337 1332 # Only include nodes that are both descendants and ancestors.
1338 1333 orderedout.append(n)
1339 1334 if (ancestors is not None) and (n in heads):
1340 1335 # We're trying to figure out which heads are reachable
1341 1336 # from roots.
1342 1337 # Mark this head as having been reached
1343 1338 heads[n] = True
1344 1339 elif ancestors is None:
1345 1340 # Otherwise, we're trying to discover the heads.
1346 1341 # Assume this is a head because if it isn't, the next step
1347 1342 # will eventually remove it.
1348 1343 heads[n] = True
1349 1344 # But, obviously its parents aren't.
1350 1345 for p in self.parents(n):
1351 1346 heads.pop(p, None)
1352 1347 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1353 1348 roots = list(roots)
1354 1349 assert orderedout
1355 1350 assert roots
1356 1351 assert heads
1357 1352 return (orderedout, roots, heads)
1358 1353
1359 1354 def headrevs(self, revs=None):
1360 1355 if revs is None:
1361 1356 try:
1362 1357 return self.index.headrevs()
1363 1358 except AttributeError:
1364 1359 return self._headrevs()
1365 1360 if rustdagop is not None and self.index.rust_ext_compat:
1366 1361 return rustdagop.headrevs(self.index, revs)
1367 1362 return dagop.headrevs(revs, self._uncheckedparentrevs)
1368 1363
1369 1364 def computephases(self, roots):
1370 1365 return self.index.computephasesmapsets(roots)
1371 1366
1372 1367 def _headrevs(self):
1373 1368 count = len(self)
1374 1369 if not count:
1375 1370 return [nullrev]
1376 1371 # we won't iter over filtered rev so nobody is a head at start
1377 1372 ishead = [0] * (count + 1)
1378 1373 index = self.index
1379 1374 for r in self:
1380 1375 ishead[r] = 1 # I may be an head
1381 1376 e = index[r]
1382 1377 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1383 1378 return [r for r, val in enumerate(ishead) if val]
1384 1379
1385 1380 def heads(self, start=None, stop=None):
1386 1381 """return the list of all nodes that have no children
1387 1382
1388 1383 if start is specified, only heads that are descendants of
1389 1384 start will be returned
1390 1385 if stop is specified, it will consider all the revs from stop
1391 1386 as if they had no children
1392 1387 """
1393 1388 if start is None and stop is None:
1394 1389 if not len(self):
1395 1390 return [self.nullid]
1396 1391 return [self.node(r) for r in self.headrevs()]
1397 1392
1398 1393 if start is None:
1399 1394 start = nullrev
1400 1395 else:
1401 1396 start = self.rev(start)
1402 1397
1403 1398 stoprevs = {self.rev(n) for n in stop or []}
1404 1399
1405 1400 revs = dagop.headrevssubset(
1406 1401 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1407 1402 )
1408 1403
1409 1404 return [self.node(rev) for rev in revs]
1410 1405
1411 1406 def children(self, node):
1412 1407 """find the children of a given node"""
1413 1408 c = []
1414 1409 p = self.rev(node)
1415 1410 for r in self.revs(start=p + 1):
1416 1411 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1417 1412 if prevs:
1418 1413 for pr in prevs:
1419 1414 if pr == p:
1420 1415 c.append(self.node(r))
1421 1416 elif p == nullrev:
1422 1417 c.append(self.node(r))
1423 1418 return c
1424 1419
1425 1420 def commonancestorsheads(self, a, b):
1426 1421 """calculate all the heads of the common ancestors of nodes a and b"""
1427 1422 a, b = self.rev(a), self.rev(b)
1428 1423 ancs = self._commonancestorsheads(a, b)
1429 1424 return pycompat.maplist(self.node, ancs)
1430 1425
1431 1426 def _commonancestorsheads(self, *revs):
1432 1427 """calculate all the heads of the common ancestors of revs"""
1433 1428 try:
1434 1429 ancs = self.index.commonancestorsheads(*revs)
1435 1430 except (AttributeError, OverflowError): # C implementation failed
1436 1431 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1437 1432 return ancs
1438 1433
1439 1434 def isancestor(self, a, b):
1440 1435 """return True if node a is an ancestor of node b
1441 1436
1442 1437 A revision is considered an ancestor of itself."""
1443 1438 a, b = self.rev(a), self.rev(b)
1444 1439 return self.isancestorrev(a, b)
1445 1440
1446 1441 def isancestorrev(self, a, b):
1447 1442 """return True if revision a is an ancestor of revision b
1448 1443
1449 1444 A revision is considered an ancestor of itself.
1450 1445
1451 1446 The implementation of this is trivial but the use of
1452 1447 reachableroots is not."""
1453 1448 if a == nullrev:
1454 1449 return True
1455 1450 elif a == b:
1456 1451 return True
1457 1452 elif a > b:
1458 1453 return False
1459 1454 return bool(self.reachableroots(a, [b], [a], includepath=False))
1460 1455
1461 1456 def reachableroots(self, minroot, heads, roots, includepath=False):
1462 1457 """return (heads(::(<roots> and <roots>::<heads>)))
1463 1458
1464 1459 If includepath is True, return (<roots>::<heads>)."""
1465 1460 try:
1466 1461 return self.index.reachableroots2(
1467 1462 minroot, heads, roots, includepath
1468 1463 )
1469 1464 except AttributeError:
1470 1465 return dagop._reachablerootspure(
1471 1466 self.parentrevs, minroot, roots, heads, includepath
1472 1467 )
1473 1468
1474 1469 def ancestor(self, a, b):
1475 1470 """calculate the "best" common ancestor of nodes a and b"""
1476 1471
1477 1472 a, b = self.rev(a), self.rev(b)
1478 1473 try:
1479 1474 ancs = self.index.ancestors(a, b)
1480 1475 except (AttributeError, OverflowError):
1481 1476 ancs = ancestor.ancestors(self.parentrevs, a, b)
1482 1477 if ancs:
1483 1478 # choose a consistent winner when there's a tie
1484 1479 return min(map(self.node, ancs))
1485 1480 return self.nullid
1486 1481
1487 1482 def _match(self, id):
1488 1483 if isinstance(id, int):
1489 1484 # rev
1490 1485 return self.node(id)
1491 1486 if len(id) == self.nodeconstants.nodelen:
1492 1487 # possibly a binary node
1493 1488 # odds of a binary node being all hex in ASCII are 1 in 10**25
1494 1489 try:
1495 1490 node = id
1496 1491 self.rev(node) # quick search the index
1497 1492 return node
1498 1493 except error.LookupError:
1499 1494 pass # may be partial hex id
1500 1495 try:
1501 1496 # str(rev)
1502 1497 rev = int(id)
1503 1498 if b"%d" % rev != id:
1504 1499 raise ValueError
1505 1500 if rev < 0:
1506 1501 rev = len(self) + rev
1507 1502 if rev < 0 or rev >= len(self):
1508 1503 raise ValueError
1509 1504 return self.node(rev)
1510 1505 except (ValueError, OverflowError):
1511 1506 pass
1512 1507 if len(id) == 2 * self.nodeconstants.nodelen:
1513 1508 try:
1514 1509 # a full hex nodeid?
1515 1510 node = bin(id)
1516 1511 self.rev(node)
1517 1512 return node
1518 1513 except (TypeError, error.LookupError):
1519 1514 pass
1520 1515
1521 1516 def _partialmatch(self, id):
1522 1517 # we don't care wdirfilenodeids as they should be always full hash
1523 1518 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1524 1519 ambiguous = False
1525 1520 try:
1526 1521 partial = self.index.partialmatch(id)
1527 1522 if partial and self.hasnode(partial):
1528 1523 if maybewdir:
1529 1524 # single 'ff...' match in radix tree, ambiguous with wdir
1530 1525 ambiguous = True
1531 1526 else:
1532 1527 return partial
1533 1528 elif maybewdir:
1534 1529 # no 'ff...' match in radix tree, wdir identified
1535 1530 raise error.WdirUnsupported
1536 1531 else:
1537 1532 return None
1538 1533 except error.RevlogError:
1539 1534 # parsers.c radix tree lookup gave multiple matches
1540 1535 # fast path: for unfiltered changelog, radix tree is accurate
1541 1536 if not getattr(self, 'filteredrevs', None):
1542 1537 ambiguous = True
1543 1538 # fall through to slow path that filters hidden revisions
1544 1539 except (AttributeError, ValueError):
1545 1540 # we are pure python, or key was too short to search radix tree
1546 1541 pass
1547 1542 if ambiguous:
1548 1543 raise error.AmbiguousPrefixLookupError(
1549 1544 id, self.display_id, _(b'ambiguous identifier')
1550 1545 )
1551 1546
1552 1547 if id in self._pcache:
1553 1548 return self._pcache[id]
1554 1549
1555 1550 if len(id) <= 40:
1556 1551 try:
1557 1552 # hex(node)[:...]
1558 1553 l = len(id) // 2 # grab an even number of digits
1559 1554 prefix = bin(id[: l * 2])
1560 1555 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1561 1556 nl = [
1562 1557 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1563 1558 ]
1564 1559 if self.nodeconstants.nullhex.startswith(id):
1565 1560 nl.append(self.nullid)
1566 1561 if len(nl) > 0:
1567 1562 if len(nl) == 1 and not maybewdir:
1568 1563 self._pcache[id] = nl[0]
1569 1564 return nl[0]
1570 1565 raise error.AmbiguousPrefixLookupError(
1571 1566 id, self.display_id, _(b'ambiguous identifier')
1572 1567 )
1573 1568 if maybewdir:
1574 1569 raise error.WdirUnsupported
1575 1570 return None
1576 1571 except TypeError:
1577 1572 pass
1578 1573
1579 1574 def lookup(self, id):
1580 1575 """locate a node based on:
1581 1576 - revision number or str(revision number)
1582 1577 - nodeid or subset of hex nodeid
1583 1578 """
1584 1579 n = self._match(id)
1585 1580 if n is not None:
1586 1581 return n
1587 1582 n = self._partialmatch(id)
1588 1583 if n:
1589 1584 return n
1590 1585
1591 1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1592 1587
1593 1588 def shortest(self, node, minlength=1):
1594 1589 """Find the shortest unambiguous prefix that matches node."""
1595 1590
1596 1591 def isvalid(prefix):
1597 1592 try:
1598 1593 matchednode = self._partialmatch(prefix)
1599 1594 except error.AmbiguousPrefixLookupError:
1600 1595 return False
1601 1596 except error.WdirUnsupported:
1602 1597 # single 'ff...' match
1603 1598 return True
1604 1599 if matchednode is None:
1605 1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1606 1601 return True
1607 1602
1608 1603 def maybewdir(prefix):
1609 1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1610 1605
1611 1606 hexnode = hex(node)
1612 1607
1613 1608 def disambiguate(hexnode, minlength):
1614 1609 """Disambiguate against wdirid."""
1615 1610 for length in range(minlength, len(hexnode) + 1):
1616 1611 prefix = hexnode[:length]
1617 1612 if not maybewdir(prefix):
1618 1613 return prefix
1619 1614
1620 1615 if not getattr(self, 'filteredrevs', None):
1621 1616 try:
1622 1617 length = max(self.index.shortest(node), minlength)
1623 1618 return disambiguate(hexnode, length)
1624 1619 except error.RevlogError:
1625 1620 if node != self.nodeconstants.wdirid:
1626 1621 raise error.LookupError(
1627 1622 node, self.display_id, _(b'no node')
1628 1623 )
1629 1624 except AttributeError:
1630 1625 # Fall through to pure code
1631 1626 pass
1632 1627
1633 1628 if node == self.nodeconstants.wdirid:
1634 1629 for length in range(minlength, len(hexnode) + 1):
1635 1630 prefix = hexnode[:length]
1636 1631 if isvalid(prefix):
1637 1632 return prefix
1638 1633
1639 1634 for length in range(minlength, len(hexnode) + 1):
1640 1635 prefix = hexnode[:length]
1641 1636 if isvalid(prefix):
1642 1637 return disambiguate(hexnode, length)
1643 1638
1644 1639 def cmp(self, node, text):
1645 1640 """compare text with a given file revision
1646 1641
1647 1642 returns True if text is different than what is stored.
1648 1643 """
1649 1644 p1, p2 = self.parents(node)
1650 1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1651 1646
1652 1647 def _cachesegment(self, offset, data):
1653 1648 """Add a segment to the revlog cache.
1654 1649
1655 1650 Accepts an absolute offset and the data that is at that location.
1656 1651 """
1657 1652 o, d = self._chunkcache
1658 1653 # try to add to existing cache
1659 1654 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1660 1655 self._chunkcache = o, d + data
1661 1656 else:
1662 1657 self._chunkcache = offset, data
1663 1658
1664 1659 def _readsegment(self, offset, length, df=None):
1665 1660 """Load a segment of raw data from the revlog.
1666 1661
1667 1662 Accepts an absolute offset, length to read, and an optional existing
1668 1663 file handle to read from.
1669 1664
1670 1665 If an existing file handle is passed, it will be seeked and the
1671 1666 original seek position will NOT be restored.
1672 1667
1673 1668 Returns a str or buffer of raw byte data.
1674 1669
1675 1670 Raises if the requested number of bytes could not be read.
1676 1671 """
1677 1672 # Cache data both forward and backward around the requested
1678 1673 # data, in a fixed size window. This helps speed up operations
1679 1674 # involving reading the revlog backwards.
1680 1675 cachesize = self._chunkcachesize
1681 1676 realoffset = offset & ~(cachesize - 1)
1682 1677 reallength = (
1683 1678 (offset + length + cachesize) & ~(cachesize - 1)
1684 1679 ) - realoffset
1685 1680 with self._datareadfp(df) as df:
1686 1681 df.seek(realoffset)
1687 1682 d = df.read(reallength)
1688 1683
1689 1684 self._cachesegment(realoffset, d)
1690 1685 if offset != realoffset or reallength != length:
1691 1686 startoffset = offset - realoffset
1692 1687 if len(d) - startoffset < length:
1693 1688 filename = self._indexfile if self._inline else self._datafile
1694 1689 got = len(d) - startoffset
1695 1690 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1696 1691 raise error.RevlogError(m)
1697 1692 return util.buffer(d, startoffset, length)
1698 1693
1699 1694 if len(d) < length:
1700 1695 filename = self._indexfile if self._inline else self._datafile
1701 1696 got = len(d) - startoffset
1702 1697 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1703 1698 raise error.RevlogError(m)
1704 1699
1705 1700 return d
1706 1701
1707 1702 def _getsegment(self, offset, length, df=None):
1708 1703 """Obtain a segment of raw data from the revlog.
1709 1704
1710 1705 Accepts an absolute offset, length of bytes to obtain, and an
1711 1706 optional file handle to the already-opened revlog. If the file
1712 1707 handle is used, it's original seek position will not be preserved.
1713 1708
1714 1709 Requests for data may be returned from a cache.
1715 1710
1716 1711 Returns a str or a buffer instance of raw byte data.
1717 1712 """
1718 1713 o, d = self._chunkcache
1719 1714 l = len(d)
1720 1715
1721 1716 # is it in the cache?
1722 1717 cachestart = offset - o
1723 1718 cacheend = cachestart + length
1724 1719 if cachestart >= 0 and cacheend <= l:
1725 1720 if cachestart == 0 and cacheend == l:
1726 1721 return d # avoid a copy
1727 1722 return util.buffer(d, cachestart, cacheend - cachestart)
1728 1723
1729 1724 return self._readsegment(offset, length, df=df)
1730 1725
1731 1726 def _getsegmentforrevs(self, startrev, endrev, df=None):
1732 1727 """Obtain a segment of raw data corresponding to a range of revisions.
1733 1728
1734 1729 Accepts the start and end revisions and an optional already-open
1735 1730 file handle to be used for reading. If the file handle is read, its
1736 1731 seek position will not be preserved.
1737 1732
1738 1733 Requests for data may be satisfied by a cache.
1739 1734
1740 1735 Returns a 2-tuple of (offset, data) for the requested range of
1741 1736 revisions. Offset is the integer offset from the beginning of the
1742 1737 revlog and data is a str or buffer of the raw byte data.
1743 1738
1744 1739 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1745 1740 to determine where each revision's data begins and ends.
1746 1741 """
1747 1742 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1748 1743 # (functions are expensive).
1749 1744 index = self.index
1750 1745 istart = index[startrev]
1751 1746 start = int(istart[0] >> 16)
1752 1747 if startrev == endrev:
1753 1748 end = start + istart[1]
1754 1749 else:
1755 1750 iend = index[endrev]
1756 1751 end = int(iend[0] >> 16) + iend[1]
1757 1752
1758 1753 if self._inline:
1759 1754 start += (startrev + 1) * self.index.entry_size
1760 1755 end += (endrev + 1) * self.index.entry_size
1761 1756 length = end - start
1762 1757
1763 1758 return start, self._getsegment(start, length, df=df)
1764 1759
1765 1760 def _chunk(self, rev, df=None):
1766 1761 """Obtain a single decompressed chunk for a revision.
1767 1762
1768 1763 Accepts an integer revision and an optional already-open file handle
1769 1764 to be used for reading. If used, the seek position of the file will not
1770 1765 be preserved.
1771 1766
1772 1767 Returns a str holding uncompressed data for the requested revision.
1773 1768 """
1774 1769 compression_mode = self.index[rev][10]
1775 1770 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1776 1771 if compression_mode == COMP_MODE_PLAIN:
1777 1772 return data
1778 1773 elif compression_mode == COMP_MODE_DEFAULT:
1779 1774 return self._decompressor(data)
1780 1775 elif compression_mode == COMP_MODE_INLINE:
1781 1776 return self.decompress(data)
1782 1777 else:
1783 1778 msg = 'unknown compression mode %d'
1784 1779 msg %= compression_mode
1785 1780 raise error.RevlogError(msg)
1786 1781
1787 1782 def _chunks(self, revs, df=None, targetsize=None):
1788 1783 """Obtain decompressed chunks for the specified revisions.
1789 1784
1790 1785 Accepts an iterable of numeric revisions that are assumed to be in
1791 1786 ascending order. Also accepts an optional already-open file handle
1792 1787 to be used for reading. If used, the seek position of the file will
1793 1788 not be preserved.
1794 1789
1795 1790 This function is similar to calling ``self._chunk()`` multiple times,
1796 1791 but is faster.
1797 1792
1798 1793 Returns a list with decompressed data for each requested revision.
1799 1794 """
1800 1795 if not revs:
1801 1796 return []
1802 1797 start = self.start
1803 1798 length = self.length
1804 1799 inline = self._inline
1805 1800 iosize = self.index.entry_size
1806 1801 buffer = util.buffer
1807 1802
1808 1803 l = []
1809 1804 ladd = l.append
1810 1805
1811 1806 if not self._withsparseread:
1812 1807 slicedchunks = (revs,)
1813 1808 else:
1814 1809 slicedchunks = deltautil.slicechunk(
1815 1810 self, revs, targetsize=targetsize
1816 1811 )
1817 1812
1818 1813 for revschunk in slicedchunks:
1819 1814 firstrev = revschunk[0]
1820 1815 # Skip trailing revisions with empty diff
1821 1816 for lastrev in revschunk[::-1]:
1822 1817 if length(lastrev) != 0:
1823 1818 break
1824 1819
1825 1820 try:
1826 1821 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1827 1822 except OverflowError:
1828 1823 # issue4215 - we can't cache a run of chunks greater than
1829 1824 # 2G on Windows
1830 1825 return [self._chunk(rev, df=df) for rev in revschunk]
1831 1826
1832 1827 decomp = self.decompress
1833 1828 # self._decompressor might be None, but will not be used in that case
1834 1829 def_decomp = self._decompressor
1835 1830 for rev in revschunk:
1836 1831 chunkstart = start(rev)
1837 1832 if inline:
1838 1833 chunkstart += (rev + 1) * iosize
1839 1834 chunklength = length(rev)
1840 1835 comp_mode = self.index[rev][10]
1841 1836 c = buffer(data, chunkstart - offset, chunklength)
1842 1837 if comp_mode == COMP_MODE_PLAIN:
1843 1838 ladd(c)
1844 1839 elif comp_mode == COMP_MODE_INLINE:
1845 1840 ladd(decomp(c))
1846 1841 elif comp_mode == COMP_MODE_DEFAULT:
1847 1842 ladd(def_decomp(c))
1848 1843 else:
1849 1844 msg = 'unknown compression mode %d'
1850 1845 msg %= comp_mode
1851 1846 raise error.RevlogError(msg)
1852 1847
1853 1848 return l
1854 1849
1855 1850 def _chunkclear(self):
1856 1851 """Clear the raw chunk cache."""
1857 1852 self._chunkcache = (0, b'')
1858 1853
1859 1854 def deltaparent(self, rev):
1860 1855 """return deltaparent of the given revision"""
1861 1856 base = self.index[rev][3]
1862 1857 if base == rev:
1863 1858 return nullrev
1864 1859 elif self._generaldelta:
1865 1860 return base
1866 1861 else:
1867 1862 return rev - 1
1868 1863
1869 1864 def issnapshot(self, rev):
1870 1865 """tells whether rev is a snapshot"""
1871 1866 if not self._sparserevlog:
1872 1867 return self.deltaparent(rev) == nullrev
1873 1868 elif util.safehasattr(self.index, b'issnapshot'):
1874 1869 # directly assign the method to cache the testing and access
1875 1870 self.issnapshot = self.index.issnapshot
1876 1871 return self.issnapshot(rev)
1877 1872 if rev == nullrev:
1878 1873 return True
1879 1874 entry = self.index[rev]
1880 1875 base = entry[3]
1881 1876 if base == rev:
1882 1877 return True
1883 1878 if base == nullrev:
1884 1879 return True
1885 1880 p1 = entry[5]
1886 1881 p2 = entry[6]
1887 1882 if base == p1 or base == p2:
1888 1883 return False
1889 1884 return self.issnapshot(base)
1890 1885
1891 1886 def snapshotdepth(self, rev):
1892 1887 """number of snapshot in the chain before this one"""
1893 1888 if not self.issnapshot(rev):
1894 1889 raise error.ProgrammingError(b'revision %d not a snapshot')
1895 1890 return len(self._deltachain(rev)[0]) - 1
1896 1891
1897 1892 def revdiff(self, rev1, rev2):
1898 1893 """return or calculate a delta between two revisions
1899 1894
1900 1895 The delta calculated is in binary form and is intended to be written to
1901 1896 revlog data directly. So this function needs raw revision data.
1902 1897 """
1903 1898 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1904 1899 return bytes(self._chunk(rev2))
1905 1900
1906 1901 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1907 1902
1908 1903 def _processflags(self, text, flags, operation, raw=False):
1909 1904 """deprecated entry point to access flag processors"""
1910 1905 msg = b'_processflag(...) use the specialized variant'
1911 1906 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1912 1907 if raw:
1913 1908 return text, flagutil.processflagsraw(self, text, flags)
1914 1909 elif operation == b'read':
1915 1910 return flagutil.processflagsread(self, text, flags)
1916 1911 else: # write operation
1917 1912 return flagutil.processflagswrite(self, text, flags)
1918 1913
1919 1914 def revision(self, nodeorrev, _df=None, raw=False):
1920 1915 """return an uncompressed revision of a given node or revision
1921 1916 number.
1922 1917
1923 1918 _df - an existing file handle to read from. (internal-only)
1924 1919 raw - an optional argument specifying if the revision data is to be
1925 1920 treated as raw data when applying flag transforms. 'raw' should be set
1926 1921 to True when generating changegroups or in debug commands.
1927 1922 """
1928 1923 if raw:
1929 1924 msg = (
1930 1925 b'revlog.revision(..., raw=True) is deprecated, '
1931 1926 b'use revlog.rawdata(...)'
1932 1927 )
1933 1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1934 1929 return self._revisiondata(nodeorrev, _df, raw=raw)
1935 1930
1936 1931 def sidedata(self, nodeorrev, _df=None):
1937 1932 """a map of extra data related to the changeset but not part of the hash
1938 1933
1939 1934 This function currently return a dictionary. However, more advanced
1940 1935 mapping object will likely be used in the future for a more
1941 1936 efficient/lazy code.
1942 1937 """
1943 1938 # deal with <nodeorrev> argument type
1944 1939 if isinstance(nodeorrev, int):
1945 1940 rev = nodeorrev
1946 1941 else:
1947 1942 rev = self.rev(nodeorrev)
1948 1943 return self._sidedata(rev)
1949 1944
1950 1945 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1951 1946 # deal with <nodeorrev> argument type
1952 1947 if isinstance(nodeorrev, int):
1953 1948 rev = nodeorrev
1954 1949 node = self.node(rev)
1955 1950 else:
1956 1951 node = nodeorrev
1957 1952 rev = None
1958 1953
1959 1954 # fast path the special `nullid` rev
1960 1955 if node == self.nullid:
1961 1956 return b""
1962 1957
1963 1958 # ``rawtext`` is the text as stored inside the revlog. Might be the
1964 1959 # revision or might need to be processed to retrieve the revision.
1965 1960 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1966 1961
1967 1962 if raw and validated:
1968 1963 # if we don't want to process the raw text and that raw
1969 1964 # text is cached, we can exit early.
1970 1965 return rawtext
1971 1966 if rev is None:
1972 1967 rev = self.rev(node)
1973 1968 # the revlog's flag for this revision
1974 1969 # (usually alter its state or content)
1975 1970 flags = self.flags(rev)
1976 1971
1977 1972 if validated and flags == REVIDX_DEFAULT_FLAGS:
1978 1973 # no extra flags set, no flag processor runs, text = rawtext
1979 1974 return rawtext
1980 1975
1981 1976 if raw:
1982 1977 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1983 1978 text = rawtext
1984 1979 else:
1985 1980 r = flagutil.processflagsread(self, rawtext, flags)
1986 1981 text, validatehash = r
1987 1982 if validatehash:
1988 1983 self.checkhash(text, node, rev=rev)
1989 1984 if not validated:
1990 1985 self._revisioncache = (node, rev, rawtext)
1991 1986
1992 1987 return text
1993 1988
1994 1989 def _rawtext(self, node, rev, _df=None):
1995 1990 """return the possibly unvalidated rawtext for a revision
1996 1991
1997 1992 returns (rev, rawtext, validated)
1998 1993 """
1999 1994
2000 1995 # revision in the cache (could be useful to apply delta)
2001 1996 cachedrev = None
2002 1997 # An intermediate text to apply deltas to
2003 1998 basetext = None
2004 1999
2005 2000 # Check if we have the entry in cache
2006 2001 # The cache entry looks like (node, rev, rawtext)
2007 2002 if self._revisioncache:
2008 2003 if self._revisioncache[0] == node:
2009 2004 return (rev, self._revisioncache[2], True)
2010 2005 cachedrev = self._revisioncache[1]
2011 2006
2012 2007 if rev is None:
2013 2008 rev = self.rev(node)
2014 2009
2015 2010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2016 2011 if stopped:
2017 2012 basetext = self._revisioncache[2]
2018 2013
2019 2014 # drop cache to save memory, the caller is expected to
2020 2015 # update self._revisioncache after validating the text
2021 2016 self._revisioncache = None
2022 2017
2023 2018 targetsize = None
2024 2019 rawsize = self.index[rev][2]
2025 2020 if 0 <= rawsize:
2026 2021 targetsize = 4 * rawsize
2027 2022
2028 2023 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2029 2024 if basetext is None:
2030 2025 basetext = bytes(bins[0])
2031 2026 bins = bins[1:]
2032 2027
2033 2028 rawtext = mdiff.patches(basetext, bins)
2034 2029 del basetext # let us have a chance to free memory early
2035 2030 return (rev, rawtext, False)
2036 2031
2037 2032 def _sidedata(self, rev):
2038 2033 """Return the sidedata for a given revision number."""
2039 2034 index_entry = self.index[rev]
2040 2035 sidedata_offset = index_entry[8]
2041 2036 sidedata_size = index_entry[9]
2042 2037
2043 2038 if self._inline:
2044 2039 sidedata_offset += self.index.entry_size * (1 + rev)
2045 2040 if sidedata_size == 0:
2046 2041 return {}
2047 2042
2048 2043 # XXX this need caching, as we do for data
2049 2044 with self._sidedatareadfp() as sdf:
2050 2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2051 2046 filename = self._sidedatafile
2052 2047 end = self._docket.sidedata_end
2053 2048 offset = sidedata_offset
2054 2049 length = sidedata_size
2055 2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2056 2051 raise error.RevlogError(m)
2057 2052
2058 2053 sdf.seek(sidedata_offset, os.SEEK_SET)
2059 2054 comp_segment = sdf.read(sidedata_size)
2060 2055
2061 2056 if len(comp_segment) < sidedata_size:
2062 2057 filename = self._sidedatafile
2063 2058 length = sidedata_size
2064 2059 offset = sidedata_offset
2065 2060 got = len(comp_segment)
2066 2061 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2067 2062 raise error.RevlogError(m)
2068 2063
2069 2064 comp = self.index[rev][11]
2070 2065 if comp == COMP_MODE_PLAIN:
2071 2066 segment = comp_segment
2072 2067 elif comp == COMP_MODE_DEFAULT:
2073 2068 segment = self._decompressor(comp_segment)
2074 2069 elif comp == COMP_MODE_INLINE:
2075 2070 segment = self.decompress(comp_segment)
2076 2071 else:
2077 2072 msg = 'unknown compression mode %d'
2078 2073 msg %= comp
2079 2074 raise error.RevlogError(msg)
2080 2075
2081 2076 sidedata = sidedatautil.deserialize_sidedata(segment)
2082 2077 return sidedata
2083 2078
2084 2079 def rawdata(self, nodeorrev, _df=None):
2085 2080 """return an uncompressed raw data of a given node or revision number.
2086 2081
2087 2082 _df - an existing file handle to read from. (internal-only)
2088 2083 """
2089 2084 return self._revisiondata(nodeorrev, _df, raw=True)
2090 2085
2091 2086 def hash(self, text, p1, p2):
2092 2087 """Compute a node hash.
2093 2088
2094 2089 Available as a function so that subclasses can replace the hash
2095 2090 as needed.
2096 2091 """
2097 2092 return storageutil.hashrevisionsha1(text, p1, p2)
2098 2093
2099 2094 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2100 2095 """Check node hash integrity.
2101 2096
2102 2097 Available as a function so that subclasses can extend hash mismatch
2103 2098 behaviors as needed.
2104 2099 """
2105 2100 try:
2106 2101 if p1 is None and p2 is None:
2107 2102 p1, p2 = self.parents(node)
2108 2103 if node != self.hash(text, p1, p2):
2109 2104 # Clear the revision cache on hash failure. The revision cache
2110 2105 # only stores the raw revision and clearing the cache does have
2111 2106 # the side-effect that we won't have a cache hit when the raw
2112 2107 # revision data is accessed. But this case should be rare and
2113 2108 # it is extra work to teach the cache about the hash
2114 2109 # verification state.
2115 2110 if self._revisioncache and self._revisioncache[0] == node:
2116 2111 self._revisioncache = None
2117 2112
2118 2113 revornode = rev
2119 2114 if revornode is None:
2120 2115 revornode = templatefilters.short(hex(node))
2121 2116 raise error.RevlogError(
2122 2117 _(b"integrity check failed on %s:%s")
2123 2118 % (self.display_id, pycompat.bytestr(revornode))
2124 2119 )
2125 2120 except error.RevlogError:
2126 2121 if self._censorable and storageutil.iscensoredtext(text):
2127 2122 raise error.CensoredNodeError(self.display_id, node, text)
2128 2123 raise
2129 2124
2130 2125 def _enforceinlinesize(self, tr):
2131 2126 """Check if the revlog is too big for inline and convert if so.
2132 2127
2133 2128 This should be called after revisions are added to the revlog. If the
2134 2129 revlog has grown too large to be an inline revlog, it will convert it
2135 2130 to use multiple index and data files.
2136 2131 """
2137 2132 tiprev = len(self) - 1
2138 2133 total_size = self.start(tiprev) + self.length(tiprev)
2139 2134 if not self._inline or total_size < _maxinline:
2140 2135 return
2141 2136
2142 2137 troffset = tr.findoffset(self._indexfile)
2143 2138 if troffset is None:
2144 2139 raise error.RevlogError(
2145 2140 _(b"%s not found in the transaction") % self._indexfile
2146 2141 )
2147 2142 trindex = 0
2148 2143 tr.add(self._datafile, 0)
2149 2144
2150 2145 existing_handles = False
2151 2146 if self._writinghandles is not None:
2152 2147 existing_handles = True
2153 2148 fp = self._writinghandles[0]
2154 2149 fp.flush()
2155 2150 fp.close()
2156 2151 # We can't use the cached file handle after close(). So prevent
2157 2152 # its usage.
2158 2153 self._writinghandles = None
2159 2154
2160 2155 new_dfh = self._datafp(b'w+')
2161 2156 new_dfh.truncate(0) # drop any potentially existing data
2162 2157 try:
2163 2158 with self._indexfp() as read_ifh:
2164 2159 for r in self:
2165 2160 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2166 2161 if troffset <= self.start(r) + r * self.index.entry_size:
2167 2162 trindex = r
2168 2163 new_dfh.flush()
2169 2164
2170 2165 with self.__index_new_fp() as fp:
2171 2166 self._format_flags &= ~FLAG_INLINE_DATA
2172 2167 self._inline = False
2173 2168 for i in self:
2174 2169 e = self.index.entry_binary(i)
2175 2170 if i == 0 and self._docket is None:
2176 2171 header = self._format_flags | self._format_version
2177 2172 header = self.index.pack_header(header)
2178 2173 e = header + e
2179 2174 fp.write(e)
2180 2175 if self._docket is not None:
2181 2176 self._docket.index_end = fp.tell()
2182 2177
2183 2178 # There is a small transactional race here. If the rename of
2184 2179 # the index fails, we should remove the datafile. It is more
2185 2180 # important to ensure that the data file is not truncated
2186 2181 # when the index is replaced as otherwise data is lost.
2187 2182 tr.replace(self._datafile, self.start(trindex))
2188 2183
2189 2184 # the temp file replace the real index when we exit the context
2190 2185 # manager
2191 2186
2192 2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2193 2188 nodemaputil.setup_persistent_nodemap(tr, self)
2194 2189 self._chunkclear()
2195 2190
2196 2191 if existing_handles:
2197 2192 # switched from inline to conventional reopen the index
2198 2193 ifh = self.__index_write_fp()
2199 2194 self._writinghandles = (ifh, new_dfh, None)
2200 2195 new_dfh = None
2201 2196 finally:
2202 2197 if new_dfh is not None:
2203 2198 new_dfh.close()
2204 2199
2205 2200 def _nodeduplicatecallback(self, transaction, node):
2206 2201 """called when trying to add a node already stored."""
2207 2202
2208 2203 @contextlib.contextmanager
2209 2204 def _writing(self, transaction):
2210 2205 if self._trypending:
2211 2206 msg = b'try to write in a `trypending` revlog: %s'
2212 2207 msg %= self.display_id
2213 2208 raise error.ProgrammingError(msg)
2214 2209 if self._writinghandles is not None:
2215 2210 yield
2216 2211 else:
2217 2212 ifh = dfh = sdfh = None
2218 2213 try:
2219 2214 r = len(self)
2220 2215 # opening the data file.
2221 2216 dsize = 0
2222 2217 if r:
2223 2218 dsize = self.end(r - 1)
2224 2219 dfh = None
2225 2220 if not self._inline:
2226 2221 try:
2227 2222 dfh = self._datafp(b"r+")
2228 2223 if self._docket is None:
2229 2224 dfh.seek(0, os.SEEK_END)
2230 2225 else:
2231 2226 dfh.seek(self._docket.data_end, os.SEEK_SET)
2232 2227 except IOError as inst:
2233 2228 if inst.errno != errno.ENOENT:
2234 2229 raise
2235 2230 dfh = self._datafp(b"w+")
2236 2231 transaction.add(self._datafile, dsize)
2237 2232 if self._sidedatafile is not None:
2238 2233 try:
2239 2234 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2240 2235 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2241 2236 except IOError as inst:
2242 2237 if inst.errno != errno.ENOENT:
2243 2238 raise
2244 2239 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2245 2240 transaction.add(
2246 2241 self._sidedatafile, self._docket.sidedata_end
2247 2242 )
2248 2243
2249 2244 # opening the index file.
2250 2245 isize = r * self.index.entry_size
2251 2246 ifh = self.__index_write_fp()
2252 2247 if self._inline:
2253 2248 transaction.add(self._indexfile, dsize + isize)
2254 2249 else:
2255 2250 transaction.add(self._indexfile, isize)
2256 2251 # exposing all file handle for writing.
2257 2252 self._writinghandles = (ifh, dfh, sdfh)
2258 2253 yield
2259 2254 if self._docket is not None:
2260 2255 self._write_docket(transaction)
2261 2256 finally:
2262 2257 self._writinghandles = None
2263 2258 if dfh is not None:
2264 2259 dfh.close()
2265 2260 if sdfh is not None:
2266 2261 dfh.close()
2267 2262 # closing the index file last to avoid exposing referent to
2268 2263 # potential unflushed data content.
2269 2264 if ifh is not None:
2270 2265 ifh.close()
2271 2266
2272 2267 def _write_docket(self, transaction):
2273 2268 """write the current docket on disk
2274 2269
2275 2270 Exist as a method to help changelog to implement transaction logic
2276 2271
2277 2272 We could also imagine using the same transaction logic for all revlog
2278 2273 since docket are cheap."""
2279 2274 self._docket.write(transaction)
2280 2275
2281 2276 def addrevision(
2282 2277 self,
2283 2278 text,
2284 2279 transaction,
2285 2280 link,
2286 2281 p1,
2287 2282 p2,
2288 2283 cachedelta=None,
2289 2284 node=None,
2290 2285 flags=REVIDX_DEFAULT_FLAGS,
2291 2286 deltacomputer=None,
2292 2287 sidedata=None,
2293 2288 ):
2294 2289 """add a revision to the log
2295 2290
2296 2291 text - the revision data to add
2297 2292 transaction - the transaction object used for rollback
2298 2293 link - the linkrev data to add
2299 2294 p1, p2 - the parent nodeids of the revision
2300 2295 cachedelta - an optional precomputed delta
2301 2296 node - nodeid of revision; typically node is not specified, and it is
2302 2297 computed by default as hash(text, p1, p2), however subclasses might
2303 2298 use different hashing method (and override checkhash() in such case)
2304 2299 flags - the known flags to set on the revision
2305 2300 deltacomputer - an optional deltacomputer instance shared between
2306 2301 multiple calls
2307 2302 """
2308 2303 if link == nullrev:
2309 2304 raise error.RevlogError(
2310 2305 _(b"attempted to add linkrev -1 to %s") % self.display_id
2311 2306 )
2312 2307
2313 2308 if sidedata is None:
2314 2309 sidedata = {}
2315 2310 elif sidedata and not self.hassidedata:
2316 2311 raise error.ProgrammingError(
2317 2312 _(b"trying to add sidedata to a revlog who don't support them")
2318 2313 )
2319 2314
2320 2315 if flags:
2321 2316 node = node or self.hash(text, p1, p2)
2322 2317
2323 2318 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2324 2319
2325 2320 # If the flag processor modifies the revision data, ignore any provided
2326 2321 # cachedelta.
2327 2322 if rawtext != text:
2328 2323 cachedelta = None
2329 2324
2330 2325 if len(rawtext) > _maxentrysize:
2331 2326 raise error.RevlogError(
2332 2327 _(
2333 2328 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2334 2329 )
2335 2330 % (self.display_id, len(rawtext))
2336 2331 )
2337 2332
2338 2333 node = node or self.hash(rawtext, p1, p2)
2339 2334 rev = self.index.get_rev(node)
2340 2335 if rev is not None:
2341 2336 return rev
2342 2337
2343 2338 if validatehash:
2344 2339 self.checkhash(rawtext, node, p1=p1, p2=p2)
2345 2340
2346 2341 return self.addrawrevision(
2347 2342 rawtext,
2348 2343 transaction,
2349 2344 link,
2350 2345 p1,
2351 2346 p2,
2352 2347 node,
2353 2348 flags,
2354 2349 cachedelta=cachedelta,
2355 2350 deltacomputer=deltacomputer,
2356 2351 sidedata=sidedata,
2357 2352 )
2358 2353
2359 2354 def addrawrevision(
2360 2355 self,
2361 2356 rawtext,
2362 2357 transaction,
2363 2358 link,
2364 2359 p1,
2365 2360 p2,
2366 2361 node,
2367 2362 flags,
2368 2363 cachedelta=None,
2369 2364 deltacomputer=None,
2370 2365 sidedata=None,
2371 2366 ):
2372 2367 """add a raw revision with known flags, node and parents
2373 2368 useful when reusing a revision not stored in this revlog (ex: received
2374 2369 over wire, or read from an external bundle).
2375 2370 """
2376 2371 with self._writing(transaction):
2377 2372 return self._addrevision(
2378 2373 node,
2379 2374 rawtext,
2380 2375 transaction,
2381 2376 link,
2382 2377 p1,
2383 2378 p2,
2384 2379 flags,
2385 2380 cachedelta,
2386 2381 deltacomputer=deltacomputer,
2387 2382 sidedata=sidedata,
2388 2383 )
2389 2384
2390 2385 def compress(self, data):
2391 2386 """Generate a possibly-compressed representation of data."""
2392 2387 if not data:
2393 2388 return b'', data
2394 2389
2395 2390 compressed = self._compressor.compress(data)
2396 2391
2397 2392 if compressed:
2398 2393 # The revlog compressor added the header in the returned data.
2399 2394 return b'', compressed
2400 2395
2401 2396 if data[0:1] == b'\0':
2402 2397 return b'', data
2403 2398 return b'u', data
2404 2399
2405 2400 def decompress(self, data):
2406 2401 """Decompress a revlog chunk.
2407 2402
2408 2403 The chunk is expected to begin with a header identifying the
2409 2404 format type so it can be routed to an appropriate decompressor.
2410 2405 """
2411 2406 if not data:
2412 2407 return data
2413 2408
2414 2409 # Revlogs are read much more frequently than they are written and many
2415 2410 # chunks only take microseconds to decompress, so performance is
2416 2411 # important here.
2417 2412 #
2418 2413 # We can make a few assumptions about revlogs:
2419 2414 #
2420 2415 # 1) the majority of chunks will be compressed (as opposed to inline
2421 2416 # raw data).
2422 2417 # 2) decompressing *any* data will likely by at least 10x slower than
2423 2418 # returning raw inline data.
2424 2419 # 3) we want to prioritize common and officially supported compression
2425 2420 # engines
2426 2421 #
2427 2422 # It follows that we want to optimize for "decompress compressed data
2428 2423 # when encoded with common and officially supported compression engines"
2429 2424 # case over "raw data" and "data encoded by less common or non-official
2430 2425 # compression engines." That is why we have the inline lookup first
2431 2426 # followed by the compengines lookup.
2432 2427 #
2433 2428 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2434 2429 # compressed chunks. And this matters for changelog and manifest reads.
2435 2430 t = data[0:1]
2436 2431
2437 2432 if t == b'x':
2438 2433 try:
2439 2434 return _zlibdecompress(data)
2440 2435 except zlib.error as e:
2441 2436 raise error.RevlogError(
2442 2437 _(b'revlog decompress error: %s')
2443 2438 % stringutil.forcebytestr(e)
2444 2439 )
2445 2440 # '\0' is more common than 'u' so it goes first.
2446 2441 elif t == b'\0':
2447 2442 return data
2448 2443 elif t == b'u':
2449 2444 return util.buffer(data, 1)
2450 2445
2451 2446 compressor = self._get_decompressor(t)
2452 2447
2453 2448 return compressor.decompress(data)
2454 2449
2455 2450 def _addrevision(
2456 2451 self,
2457 2452 node,
2458 2453 rawtext,
2459 2454 transaction,
2460 2455 link,
2461 2456 p1,
2462 2457 p2,
2463 2458 flags,
2464 2459 cachedelta,
2465 2460 alwayscache=False,
2466 2461 deltacomputer=None,
2467 2462 sidedata=None,
2468 2463 ):
2469 2464 """internal function to add revisions to the log
2470 2465
2471 2466 see addrevision for argument descriptions.
2472 2467
2473 2468 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2474 2469
2475 2470 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2476 2471 be used.
2477 2472
2478 2473 invariants:
2479 2474 - rawtext is optional (can be None); if not set, cachedelta must be set.
2480 2475 if both are set, they must correspond to each other.
2481 2476 """
2482 2477 if node == self.nullid:
2483 2478 raise error.RevlogError(
2484 2479 _(b"%s: attempt to add null revision") % self.display_id
2485 2480 )
2486 2481 if (
2487 2482 node == self.nodeconstants.wdirid
2488 2483 or node in self.nodeconstants.wdirfilenodeids
2489 2484 ):
2490 2485 raise error.RevlogError(
2491 2486 _(b"%s: attempt to add wdir revision") % self.display_id
2492 2487 )
2493 2488 if self._writinghandles is None:
2494 2489 msg = b'adding revision outside `revlog._writing` context'
2495 2490 raise error.ProgrammingError(msg)
2496 2491
2497 2492 if self._inline:
2498 2493 fh = self._writinghandles[0]
2499 2494 else:
2500 2495 fh = self._writinghandles[1]
2501 2496
2502 2497 btext = [rawtext]
2503 2498
2504 2499 curr = len(self)
2505 2500 prev = curr - 1
2506 2501
2507 2502 offset = self._get_data_offset(prev)
2508 2503
2509 2504 if self._concurrencychecker:
2510 2505 ifh, dfh, sdfh = self._writinghandles
2511 2506 # XXX no checking for the sidedata file
2512 2507 if self._inline:
2513 2508 # offset is "as if" it were in the .d file, so we need to add on
2514 2509 # the size of the entry metadata.
2515 2510 self._concurrencychecker(
2516 2511 ifh, self._indexfile, offset + curr * self.index.entry_size
2517 2512 )
2518 2513 else:
2519 2514 # Entries in the .i are a consistent size.
2520 2515 self._concurrencychecker(
2521 2516 ifh, self._indexfile, curr * self.index.entry_size
2522 2517 )
2523 2518 self._concurrencychecker(dfh, self._datafile, offset)
2524 2519
2525 2520 p1r, p2r = self.rev(p1), self.rev(p2)
2526 2521
2527 2522 # full versions are inserted when the needed deltas
2528 2523 # become comparable to the uncompressed text
2529 2524 if rawtext is None:
2530 2525 # need rawtext size, before changed by flag processors, which is
2531 2526 # the non-raw size. use revlog explicitly to avoid filelog's extra
2532 2527 # logic that might remove metadata size.
2533 2528 textlen = mdiff.patchedsize(
2534 2529 revlog.size(self, cachedelta[0]), cachedelta[1]
2535 2530 )
2536 2531 else:
2537 2532 textlen = len(rawtext)
2538 2533
2539 2534 if deltacomputer is None:
2540 2535 deltacomputer = deltautil.deltacomputer(self)
2541 2536
2542 2537 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2543 2538
2544 2539 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2545 2540
2546 2541 compression_mode = COMP_MODE_INLINE
2547 2542 if self._docket is not None:
2548 2543 h, d = deltainfo.data
2549 2544 if not h and not d:
2550 2545 # not data to store at all... declare them uncompressed
2551 2546 compression_mode = COMP_MODE_PLAIN
2552 2547 elif not h:
2553 2548 t = d[0:1]
2554 2549 if t == b'\0':
2555 2550 compression_mode = COMP_MODE_PLAIN
2556 2551 elif t == self._docket.default_compression_header:
2557 2552 compression_mode = COMP_MODE_DEFAULT
2558 2553 elif h == b'u':
2559 2554 # we have a more efficient way to declare uncompressed
2560 2555 h = b''
2561 2556 compression_mode = COMP_MODE_PLAIN
2562 2557 deltainfo = deltautil.drop_u_compression(deltainfo)
2563 2558
2564 2559 sidedata_compression_mode = COMP_MODE_INLINE
2565 2560 if sidedata and self.hassidedata:
2566 2561 sidedata_compression_mode = COMP_MODE_PLAIN
2567 2562 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2568 2563 sidedata_offset = self._docket.sidedata_end
2569 2564 h, comp_sidedata = self.compress(serialized_sidedata)
2570 2565 if (
2571 2566 h != b'u'
2572 2567 and comp_sidedata[0:1] != b'\0'
2573 2568 and len(comp_sidedata) < len(serialized_sidedata)
2574 2569 ):
2575 2570 assert not h
2576 2571 if (
2577 2572 comp_sidedata[0:1]
2578 2573 == self._docket.default_compression_header
2579 2574 ):
2580 2575 sidedata_compression_mode = COMP_MODE_DEFAULT
2581 2576 serialized_sidedata = comp_sidedata
2582 2577 else:
2583 2578 sidedata_compression_mode = COMP_MODE_INLINE
2584 2579 serialized_sidedata = comp_sidedata
2585 2580 else:
2586 2581 serialized_sidedata = b""
2587 2582 # Don't store the offset if the sidedata is empty, that way
2588 2583 # we can easily detect empty sidedata and they will be no different
2589 2584 # than ones we manually add.
2590 2585 sidedata_offset = 0
2591 2586
2592 2587 e = (
2593 offset_type(offset, flags),
2588 revlogutils.offset_type(offset, flags),
2594 2589 deltainfo.deltalen,
2595 2590 textlen,
2596 2591 deltainfo.base,
2597 2592 link,
2598 2593 p1r,
2599 2594 p2r,
2600 2595 node,
2601 2596 sidedata_offset,
2602 2597 len(serialized_sidedata),
2603 2598 compression_mode,
2604 2599 sidedata_compression_mode,
2605 2600 )
2606 2601
2607 2602 self.index.append(e)
2608 2603 entry = self.index.entry_binary(curr)
2609 2604 if curr == 0 and self._docket is None:
2610 2605 header = self._format_flags | self._format_version
2611 2606 header = self.index.pack_header(header)
2612 2607 entry = header + entry
2613 2608 self._writeentry(
2614 2609 transaction,
2615 2610 entry,
2616 2611 deltainfo.data,
2617 2612 link,
2618 2613 offset,
2619 2614 serialized_sidedata,
2620 2615 sidedata_offset,
2621 2616 )
2622 2617
2623 2618 rawtext = btext[0]
2624 2619
2625 2620 if alwayscache and rawtext is None:
2626 2621 rawtext = deltacomputer.buildtext(revinfo, fh)
2627 2622
2628 2623 if type(rawtext) == bytes: # only accept immutable objects
2629 2624 self._revisioncache = (node, curr, rawtext)
2630 2625 self._chainbasecache[curr] = deltainfo.chainbase
2631 2626 return curr
2632 2627
2633 2628 def _get_data_offset(self, prev):
2634 2629 """Returns the current offset in the (in-transaction) data file.
2635 2630 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2636 2631 file to store that information: since sidedata can be rewritten to the
2637 2632 end of the data file within a transaction, you can have cases where, for
2638 2633 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2639 2634 to `n - 1`'s sidedata being written after `n`'s data.
2640 2635
2641 2636 TODO cache this in a docket file before getting out of experimental."""
2642 2637 if self._docket is None:
2643 2638 return self.end(prev)
2644 2639 else:
2645 2640 return self._docket.data_end
2646 2641
2647 2642 def _writeentry(
2648 2643 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2649 2644 ):
2650 2645 # Files opened in a+ mode have inconsistent behavior on various
2651 2646 # platforms. Windows requires that a file positioning call be made
2652 2647 # when the file handle transitions between reads and writes. See
2653 2648 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2654 2649 # platforms, Python or the platform itself can be buggy. Some versions
2655 2650 # of Solaris have been observed to not append at the end of the file
2656 2651 # if the file was seeked to before the end. See issue4943 for more.
2657 2652 #
2658 2653 # We work around this issue by inserting a seek() before writing.
2659 2654 # Note: This is likely not necessary on Python 3. However, because
2660 2655 # the file handle is reused for reads and may be seeked there, we need
2661 2656 # to be careful before changing this.
2662 2657 if self._writinghandles is None:
2663 2658 msg = b'adding revision outside `revlog._writing` context'
2664 2659 raise error.ProgrammingError(msg)
2665 2660 ifh, dfh, sdfh = self._writinghandles
2666 2661 if self._docket is None:
2667 2662 ifh.seek(0, os.SEEK_END)
2668 2663 else:
2669 2664 ifh.seek(self._docket.index_end, os.SEEK_SET)
2670 2665 if dfh:
2671 2666 if self._docket is None:
2672 2667 dfh.seek(0, os.SEEK_END)
2673 2668 else:
2674 2669 dfh.seek(self._docket.data_end, os.SEEK_SET)
2675 2670 if sdfh:
2676 2671 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2677 2672
2678 2673 curr = len(self) - 1
2679 2674 if not self._inline:
2680 2675 transaction.add(self._datafile, offset)
2681 2676 if self._sidedatafile:
2682 2677 transaction.add(self._sidedatafile, sidedata_offset)
2683 2678 transaction.add(self._indexfile, curr * len(entry))
2684 2679 if data[0]:
2685 2680 dfh.write(data[0])
2686 2681 dfh.write(data[1])
2687 2682 if sidedata:
2688 2683 sdfh.write(sidedata)
2689 2684 ifh.write(entry)
2690 2685 else:
2691 2686 offset += curr * self.index.entry_size
2692 2687 transaction.add(self._indexfile, offset)
2693 2688 ifh.write(entry)
2694 2689 ifh.write(data[0])
2695 2690 ifh.write(data[1])
2696 2691 assert not sidedata
2697 2692 self._enforceinlinesize(transaction)
2698 2693 if self._docket is not None:
2699 2694 self._docket.index_end = self._writinghandles[0].tell()
2700 2695 self._docket.data_end = self._writinghandles[1].tell()
2701 2696 self._docket.sidedata_end = self._writinghandles[2].tell()
2702 2697
2703 2698 nodemaputil.setup_persistent_nodemap(transaction, self)
2704 2699
2705 2700 def addgroup(
2706 2701 self,
2707 2702 deltas,
2708 2703 linkmapper,
2709 2704 transaction,
2710 2705 alwayscache=False,
2711 2706 addrevisioncb=None,
2712 2707 duplicaterevisioncb=None,
2713 2708 ):
2714 2709 """
2715 2710 add a delta group
2716 2711
2717 2712 given a set of deltas, add them to the revision log. the
2718 2713 first delta is against its parent, which should be in our
2719 2714 log, the rest are against the previous delta.
2720 2715
2721 2716 If ``addrevisioncb`` is defined, it will be called with arguments of
2722 2717 this revlog and the node that was added.
2723 2718 """
2724 2719
2725 2720 if self._adding_group:
2726 2721 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2727 2722
2728 2723 self._adding_group = True
2729 2724 empty = True
2730 2725 try:
2731 2726 with self._writing(transaction):
2732 2727 deltacomputer = deltautil.deltacomputer(self)
2733 2728 # loop through our set of deltas
2734 2729 for data in deltas:
2735 2730 (
2736 2731 node,
2737 2732 p1,
2738 2733 p2,
2739 2734 linknode,
2740 2735 deltabase,
2741 2736 delta,
2742 2737 flags,
2743 2738 sidedata,
2744 2739 ) = data
2745 2740 link = linkmapper(linknode)
2746 2741 flags = flags or REVIDX_DEFAULT_FLAGS
2747 2742
2748 2743 rev = self.index.get_rev(node)
2749 2744 if rev is not None:
2750 2745 # this can happen if two branches make the same change
2751 2746 self._nodeduplicatecallback(transaction, rev)
2752 2747 if duplicaterevisioncb:
2753 2748 duplicaterevisioncb(self, rev)
2754 2749 empty = False
2755 2750 continue
2756 2751
2757 2752 for p in (p1, p2):
2758 2753 if not self.index.has_node(p):
2759 2754 raise error.LookupError(
2760 2755 p, self.radix, _(b'unknown parent')
2761 2756 )
2762 2757
2763 2758 if not self.index.has_node(deltabase):
2764 2759 raise error.LookupError(
2765 2760 deltabase, self.display_id, _(b'unknown delta base')
2766 2761 )
2767 2762
2768 2763 baserev = self.rev(deltabase)
2769 2764
2770 2765 if baserev != nullrev and self.iscensored(baserev):
2771 2766 # if base is censored, delta must be full replacement in a
2772 2767 # single patch operation
2773 2768 hlen = struct.calcsize(b">lll")
2774 2769 oldlen = self.rawsize(baserev)
2775 2770 newlen = len(delta) - hlen
2776 2771 if delta[:hlen] != mdiff.replacediffheader(
2777 2772 oldlen, newlen
2778 2773 ):
2779 2774 raise error.CensoredBaseError(
2780 2775 self.display_id, self.node(baserev)
2781 2776 )
2782 2777
2783 2778 if not flags and self._peek_iscensored(baserev, delta):
2784 2779 flags |= REVIDX_ISCENSORED
2785 2780
2786 2781 # We assume consumers of addrevisioncb will want to retrieve
2787 2782 # the added revision, which will require a call to
2788 2783 # revision(). revision() will fast path if there is a cache
2789 2784 # hit. So, we tell _addrevision() to always cache in this case.
2790 2785 # We're only using addgroup() in the context of changegroup
2791 2786 # generation so the revision data can always be handled as raw
2792 2787 # by the flagprocessor.
2793 2788 rev = self._addrevision(
2794 2789 node,
2795 2790 None,
2796 2791 transaction,
2797 2792 link,
2798 2793 p1,
2799 2794 p2,
2800 2795 flags,
2801 2796 (baserev, delta),
2802 2797 alwayscache=alwayscache,
2803 2798 deltacomputer=deltacomputer,
2804 2799 sidedata=sidedata,
2805 2800 )
2806 2801
2807 2802 if addrevisioncb:
2808 2803 addrevisioncb(self, rev)
2809 2804 empty = False
2810 2805 finally:
2811 2806 self._adding_group = False
2812 2807 return not empty
2813 2808
2814 2809 def iscensored(self, rev):
2815 2810 """Check if a file revision is censored."""
2816 2811 if not self._censorable:
2817 2812 return False
2818 2813
2819 2814 return self.flags(rev) & REVIDX_ISCENSORED
2820 2815
2821 2816 def _peek_iscensored(self, baserev, delta):
2822 2817 """Quickly check if a delta produces a censored revision."""
2823 2818 if not self._censorable:
2824 2819 return False
2825 2820
2826 2821 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2827 2822
2828 2823 def getstrippoint(self, minlink):
2829 2824 """find the minimum rev that must be stripped to strip the linkrev
2830 2825
2831 2826 Returns a tuple containing the minimum rev and a set of all revs that
2832 2827 have linkrevs that will be broken by this strip.
2833 2828 """
2834 2829 return storageutil.resolvestripinfo(
2835 2830 minlink,
2836 2831 len(self) - 1,
2837 2832 self.headrevs(),
2838 2833 self.linkrev,
2839 2834 self.parentrevs,
2840 2835 )
2841 2836
2842 2837 def strip(self, minlink, transaction):
2843 2838 """truncate the revlog on the first revision with a linkrev >= minlink
2844 2839
2845 2840 This function is called when we're stripping revision minlink and
2846 2841 its descendants from the repository.
2847 2842
2848 2843 We have to remove all revisions with linkrev >= minlink, because
2849 2844 the equivalent changelog revisions will be renumbered after the
2850 2845 strip.
2851 2846
2852 2847 So we truncate the revlog on the first of these revisions, and
2853 2848 trust that the caller has saved the revisions that shouldn't be
2854 2849 removed and that it'll re-add them after this truncation.
2855 2850 """
2856 2851 if len(self) == 0:
2857 2852 return
2858 2853
2859 2854 rev, _ = self.getstrippoint(minlink)
2860 2855 if rev == len(self):
2861 2856 return
2862 2857
2863 2858 # first truncate the files on disk
2864 2859 data_end = self.start(rev)
2865 2860 if not self._inline:
2866 2861 transaction.add(self._datafile, data_end)
2867 2862 end = rev * self.index.entry_size
2868 2863 else:
2869 2864 end = data_end + (rev * self.index.entry_size)
2870 2865
2871 2866 if self._sidedatafile:
2872 2867 sidedata_end = self.sidedata_cut_off(rev)
2873 2868 transaction.add(self._sidedatafile, sidedata_end)
2874 2869
2875 2870 transaction.add(self._indexfile, end)
2876 2871 if self._docket is not None:
2877 2872 # XXX we could, leverage the docket while stripping. However it is
2878 2873 # not powerfull enough at the time of this comment
2879 2874 self._docket.index_end = end
2880 2875 self._docket.data_end = data_end
2881 2876 self._docket.sidedata_end = sidedata_end
2882 2877 self._docket.write(transaction, stripping=True)
2883 2878
2884 2879 # then reset internal state in memory to forget those revisions
2885 2880 self._revisioncache = None
2886 2881 self._chaininfocache = util.lrucachedict(500)
2887 2882 self._chunkclear()
2888 2883
2889 2884 del self.index[rev:-1]
2890 2885
2891 2886 def checksize(self):
2892 2887 """Check size of index and data files
2893 2888
2894 2889 return a (dd, di) tuple.
2895 2890 - dd: extra bytes for the "data" file
2896 2891 - di: extra bytes for the "index" file
2897 2892
2898 2893 A healthy revlog will return (0, 0).
2899 2894 """
2900 2895 expected = 0
2901 2896 if len(self):
2902 2897 expected = max(0, self.end(len(self) - 1))
2903 2898
2904 2899 try:
2905 2900 with self._datafp() as f:
2906 2901 f.seek(0, io.SEEK_END)
2907 2902 actual = f.tell()
2908 2903 dd = actual - expected
2909 2904 except IOError as inst:
2910 2905 if inst.errno != errno.ENOENT:
2911 2906 raise
2912 2907 dd = 0
2913 2908
2914 2909 try:
2915 2910 f = self.opener(self._indexfile)
2916 2911 f.seek(0, io.SEEK_END)
2917 2912 actual = f.tell()
2918 2913 f.close()
2919 2914 s = self.index.entry_size
2920 2915 i = max(0, actual // s)
2921 2916 di = actual - (i * s)
2922 2917 if self._inline:
2923 2918 databytes = 0
2924 2919 for r in self:
2925 2920 databytes += max(0, self.length(r))
2926 2921 dd = 0
2927 2922 di = actual - len(self) * s - databytes
2928 2923 except IOError as inst:
2929 2924 if inst.errno != errno.ENOENT:
2930 2925 raise
2931 2926 di = 0
2932 2927
2933 2928 return (dd, di)
2934 2929
2935 2930 def files(self):
2936 2931 res = [self._indexfile]
2937 2932 if not self._inline:
2938 2933 res.append(self._datafile)
2939 2934 return res
2940 2935
2941 2936 def emitrevisions(
2942 2937 self,
2943 2938 nodes,
2944 2939 nodesorder=None,
2945 2940 revisiondata=False,
2946 2941 assumehaveparentrevisions=False,
2947 2942 deltamode=repository.CG_DELTAMODE_STD,
2948 2943 sidedata_helpers=None,
2949 2944 ):
2950 2945 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2951 2946 raise error.ProgrammingError(
2952 2947 b'unhandled value for nodesorder: %s' % nodesorder
2953 2948 )
2954 2949
2955 2950 if nodesorder is None and not self._generaldelta:
2956 2951 nodesorder = b'storage'
2957 2952
2958 2953 if (
2959 2954 not self._storedeltachains
2960 2955 and deltamode != repository.CG_DELTAMODE_PREV
2961 2956 ):
2962 2957 deltamode = repository.CG_DELTAMODE_FULL
2963 2958
2964 2959 return storageutil.emitrevisions(
2965 2960 self,
2966 2961 nodes,
2967 2962 nodesorder,
2968 2963 revlogrevisiondelta,
2969 2964 deltaparentfn=self.deltaparent,
2970 2965 candeltafn=self.candelta,
2971 2966 rawsizefn=self.rawsize,
2972 2967 revdifffn=self.revdiff,
2973 2968 flagsfn=self.flags,
2974 2969 deltamode=deltamode,
2975 2970 revisiondata=revisiondata,
2976 2971 assumehaveparentrevisions=assumehaveparentrevisions,
2977 2972 sidedata_helpers=sidedata_helpers,
2978 2973 )
2979 2974
2980 2975 DELTAREUSEALWAYS = b'always'
2981 2976 DELTAREUSESAMEREVS = b'samerevs'
2982 2977 DELTAREUSENEVER = b'never'
2983 2978
2984 2979 DELTAREUSEFULLADD = b'fulladd'
2985 2980
2986 2981 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2987 2982
2988 2983 def clone(
2989 2984 self,
2990 2985 tr,
2991 2986 destrevlog,
2992 2987 addrevisioncb=None,
2993 2988 deltareuse=DELTAREUSESAMEREVS,
2994 2989 forcedeltabothparents=None,
2995 2990 sidedata_helpers=None,
2996 2991 ):
2997 2992 """Copy this revlog to another, possibly with format changes.
2998 2993
2999 2994 The destination revlog will contain the same revisions and nodes.
3000 2995 However, it may not be bit-for-bit identical due to e.g. delta encoding
3001 2996 differences.
3002 2997
3003 2998 The ``deltareuse`` argument control how deltas from the existing revlog
3004 2999 are preserved in the destination revlog. The argument can have the
3005 3000 following values:
3006 3001
3007 3002 DELTAREUSEALWAYS
3008 3003 Deltas will always be reused (if possible), even if the destination
3009 3004 revlog would not select the same revisions for the delta. This is the
3010 3005 fastest mode of operation.
3011 3006 DELTAREUSESAMEREVS
3012 3007 Deltas will be reused if the destination revlog would pick the same
3013 3008 revisions for the delta. This mode strikes a balance between speed
3014 3009 and optimization.
3015 3010 DELTAREUSENEVER
3016 3011 Deltas will never be reused. This is the slowest mode of execution.
3017 3012 This mode can be used to recompute deltas (e.g. if the diff/delta
3018 3013 algorithm changes).
3019 3014 DELTAREUSEFULLADD
3020 3015 Revision will be re-added as if their were new content. This is
3021 3016 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3022 3017 eg: large file detection and handling.
3023 3018
3024 3019 Delta computation can be slow, so the choice of delta reuse policy can
3025 3020 significantly affect run time.
3026 3021
3027 3022 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3028 3023 two extremes. Deltas will be reused if they are appropriate. But if the
3029 3024 delta could choose a better revision, it will do so. This means if you
3030 3025 are converting a non-generaldelta revlog to a generaldelta revlog,
3031 3026 deltas will be recomputed if the delta's parent isn't a parent of the
3032 3027 revision.
3033 3028
3034 3029 In addition to the delta policy, the ``forcedeltabothparents``
3035 3030 argument controls whether to force compute deltas against both parents
3036 3031 for merges. By default, the current default is used.
3037 3032
3038 3033 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3039 3034 `sidedata_helpers`.
3040 3035 """
3041 3036 if deltareuse not in self.DELTAREUSEALL:
3042 3037 raise ValueError(
3043 3038 _(b'value for deltareuse invalid: %s') % deltareuse
3044 3039 )
3045 3040
3046 3041 if len(destrevlog):
3047 3042 raise ValueError(_(b'destination revlog is not empty'))
3048 3043
3049 3044 if getattr(self, 'filteredrevs', None):
3050 3045 raise ValueError(_(b'source revlog has filtered revisions'))
3051 3046 if getattr(destrevlog, 'filteredrevs', None):
3052 3047 raise ValueError(_(b'destination revlog has filtered revisions'))
3053 3048
3054 3049 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3055 3050 # if possible.
3056 3051 oldlazydelta = destrevlog._lazydelta
3057 3052 oldlazydeltabase = destrevlog._lazydeltabase
3058 3053 oldamd = destrevlog._deltabothparents
3059 3054
3060 3055 try:
3061 3056 if deltareuse == self.DELTAREUSEALWAYS:
3062 3057 destrevlog._lazydeltabase = True
3063 3058 destrevlog._lazydelta = True
3064 3059 elif deltareuse == self.DELTAREUSESAMEREVS:
3065 3060 destrevlog._lazydeltabase = False
3066 3061 destrevlog._lazydelta = True
3067 3062 elif deltareuse == self.DELTAREUSENEVER:
3068 3063 destrevlog._lazydeltabase = False
3069 3064 destrevlog._lazydelta = False
3070 3065
3071 3066 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3072 3067
3073 3068 self._clone(
3074 3069 tr,
3075 3070 destrevlog,
3076 3071 addrevisioncb,
3077 3072 deltareuse,
3078 3073 forcedeltabothparents,
3079 3074 sidedata_helpers,
3080 3075 )
3081 3076
3082 3077 finally:
3083 3078 destrevlog._lazydelta = oldlazydelta
3084 3079 destrevlog._lazydeltabase = oldlazydeltabase
3085 3080 destrevlog._deltabothparents = oldamd
3086 3081
3087 3082 def _clone(
3088 3083 self,
3089 3084 tr,
3090 3085 destrevlog,
3091 3086 addrevisioncb,
3092 3087 deltareuse,
3093 3088 forcedeltabothparents,
3094 3089 sidedata_helpers,
3095 3090 ):
3096 3091 """perform the core duty of `revlog.clone` after parameter processing"""
3097 3092 deltacomputer = deltautil.deltacomputer(destrevlog)
3098 3093 index = self.index
3099 3094 for rev in self:
3100 3095 entry = index[rev]
3101 3096
3102 3097 # Some classes override linkrev to take filtered revs into
3103 3098 # account. Use raw entry from index.
3104 3099 flags = entry[0] & 0xFFFF
3105 3100 linkrev = entry[4]
3106 3101 p1 = index[entry[5]][7]
3107 3102 p2 = index[entry[6]][7]
3108 3103 node = entry[7]
3109 3104
3110 3105 # (Possibly) reuse the delta from the revlog if allowed and
3111 3106 # the revlog chunk is a delta.
3112 3107 cachedelta = None
3113 3108 rawtext = None
3114 3109 if deltareuse == self.DELTAREUSEFULLADD:
3115 3110 text = self._revisiondata(rev)
3116 3111 sidedata = self.sidedata(rev)
3117 3112
3118 3113 if sidedata_helpers is not None:
3119 3114 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3120 3115 self, sidedata_helpers, sidedata, rev
3121 3116 )
3122 3117 flags = flags | new_flags[0] & ~new_flags[1]
3123 3118
3124 3119 destrevlog.addrevision(
3125 3120 text,
3126 3121 tr,
3127 3122 linkrev,
3128 3123 p1,
3129 3124 p2,
3130 3125 cachedelta=cachedelta,
3131 3126 node=node,
3132 3127 flags=flags,
3133 3128 deltacomputer=deltacomputer,
3134 3129 sidedata=sidedata,
3135 3130 )
3136 3131 else:
3137 3132 if destrevlog._lazydelta:
3138 3133 dp = self.deltaparent(rev)
3139 3134 if dp != nullrev:
3140 3135 cachedelta = (dp, bytes(self._chunk(rev)))
3141 3136
3142 3137 sidedata = None
3143 3138 if not cachedelta:
3144 3139 rawtext = self._revisiondata(rev)
3145 3140 sidedata = self.sidedata(rev)
3146 3141 if sidedata is None:
3147 3142 sidedata = self.sidedata(rev)
3148 3143
3149 3144 if sidedata_helpers is not None:
3150 3145 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3151 3146 self, sidedata_helpers, sidedata, rev
3152 3147 )
3153 3148 flags = flags | new_flags[0] & ~new_flags[1]
3154 3149
3155 3150 with destrevlog._writing(tr):
3156 3151 destrevlog._addrevision(
3157 3152 node,
3158 3153 rawtext,
3159 3154 tr,
3160 3155 linkrev,
3161 3156 p1,
3162 3157 p2,
3163 3158 flags,
3164 3159 cachedelta,
3165 3160 deltacomputer=deltacomputer,
3166 3161 sidedata=sidedata,
3167 3162 )
3168 3163
3169 3164 if addrevisioncb:
3170 3165 addrevisioncb(self, rev, node)
3171 3166
3172 3167 def censorrevision(self, tr, censornode, tombstone=b''):
3173 3168 if self._format_version == REVLOGV0:
3174 3169 raise error.RevlogError(
3175 3170 _(b'cannot censor with version %d revlogs')
3176 3171 % self._format_version
3177 3172 )
3178 3173 elif self._format_version == REVLOGV1:
3179 3174 censor.v1_censor(self, tr, censornode, tombstone)
3180 3175 else:
3181 3176 # revlog v2
3182 3177 raise error.RevlogError(
3183 3178 _(b'cannot censor with version %d revlogs')
3184 3179 % self._format_version
3185 3180 )
3186 3181
3187 3182 def verifyintegrity(self, state):
3188 3183 """Verifies the integrity of the revlog.
3189 3184
3190 3185 Yields ``revlogproblem`` instances describing problems that are
3191 3186 found.
3192 3187 """
3193 3188 dd, di = self.checksize()
3194 3189 if dd:
3195 3190 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3196 3191 if di:
3197 3192 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3198 3193
3199 3194 version = self._format_version
3200 3195
3201 3196 # The verifier tells us what version revlog we should be.
3202 3197 if version != state[b'expectedversion']:
3203 3198 yield revlogproblem(
3204 3199 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3205 3200 % (self.display_id, version, state[b'expectedversion'])
3206 3201 )
3207 3202
3208 3203 state[b'skipread'] = set()
3209 3204 state[b'safe_renamed'] = set()
3210 3205
3211 3206 for rev in self:
3212 3207 node = self.node(rev)
3213 3208
3214 3209 # Verify contents. 4 cases to care about:
3215 3210 #
3216 3211 # common: the most common case
3217 3212 # rename: with a rename
3218 3213 # meta: file content starts with b'\1\n', the metadata
3219 3214 # header defined in filelog.py, but without a rename
3220 3215 # ext: content stored externally
3221 3216 #
3222 3217 # More formally, their differences are shown below:
3223 3218 #
3224 3219 # | common | rename | meta | ext
3225 3220 # -------------------------------------------------------
3226 3221 # flags() | 0 | 0 | 0 | not 0
3227 3222 # renamed() | False | True | False | ?
3228 3223 # rawtext[0:2]=='\1\n'| False | True | True | ?
3229 3224 #
3230 3225 # "rawtext" means the raw text stored in revlog data, which
3231 3226 # could be retrieved by "rawdata(rev)". "text"
3232 3227 # mentioned below is "revision(rev)".
3233 3228 #
3234 3229 # There are 3 different lengths stored physically:
3235 3230 # 1. L1: rawsize, stored in revlog index
3236 3231 # 2. L2: len(rawtext), stored in revlog data
3237 3232 # 3. L3: len(text), stored in revlog data if flags==0, or
3238 3233 # possibly somewhere else if flags!=0
3239 3234 #
3240 3235 # L1 should be equal to L2. L3 could be different from them.
3241 3236 # "text" may or may not affect commit hash depending on flag
3242 3237 # processors (see flagutil.addflagprocessor).
3243 3238 #
3244 3239 # | common | rename | meta | ext
3245 3240 # -------------------------------------------------
3246 3241 # rawsize() | L1 | L1 | L1 | L1
3247 3242 # size() | L1 | L2-LM | L1(*) | L1 (?)
3248 3243 # len(rawtext) | L2 | L2 | L2 | L2
3249 3244 # len(text) | L2 | L2 | L2 | L3
3250 3245 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3251 3246 #
3252 3247 # LM: length of metadata, depending on rawtext
3253 3248 # (*): not ideal, see comment in filelog.size
3254 3249 # (?): could be "- len(meta)" if the resolved content has
3255 3250 # rename metadata
3256 3251 #
3257 3252 # Checks needed to be done:
3258 3253 # 1. length check: L1 == L2, in all cases.
3259 3254 # 2. hash check: depending on flag processor, we may need to
3260 3255 # use either "text" (external), or "rawtext" (in revlog).
3261 3256
3262 3257 try:
3263 3258 skipflags = state.get(b'skipflags', 0)
3264 3259 if skipflags:
3265 3260 skipflags &= self.flags(rev)
3266 3261
3267 3262 _verify_revision(self, skipflags, state, node)
3268 3263
3269 3264 l1 = self.rawsize(rev)
3270 3265 l2 = len(self.rawdata(node))
3271 3266
3272 3267 if l1 != l2:
3273 3268 yield revlogproblem(
3274 3269 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3275 3270 node=node,
3276 3271 )
3277 3272
3278 3273 except error.CensoredNodeError:
3279 3274 if state[b'erroroncensored']:
3280 3275 yield revlogproblem(
3281 3276 error=_(b'censored file data'), node=node
3282 3277 )
3283 3278 state[b'skipread'].add(node)
3284 3279 except Exception as e:
3285 3280 yield revlogproblem(
3286 3281 error=_(b'unpacking %s: %s')
3287 3282 % (short(node), stringutil.forcebytestr(e)),
3288 3283 node=node,
3289 3284 )
3290 3285 state[b'skipread'].add(node)
3291 3286
3292 3287 def storageinfo(
3293 3288 self,
3294 3289 exclusivefiles=False,
3295 3290 sharedfiles=False,
3296 3291 revisionscount=False,
3297 3292 trackedsize=False,
3298 3293 storedsize=False,
3299 3294 ):
3300 3295 d = {}
3301 3296
3302 3297 if exclusivefiles:
3303 3298 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3304 3299 if not self._inline:
3305 3300 d[b'exclusivefiles'].append((self.opener, self._datafile))
3306 3301
3307 3302 if sharedfiles:
3308 3303 d[b'sharedfiles'] = []
3309 3304
3310 3305 if revisionscount:
3311 3306 d[b'revisionscount'] = len(self)
3312 3307
3313 3308 if trackedsize:
3314 3309 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3315 3310
3316 3311 if storedsize:
3317 3312 d[b'storedsize'] = sum(
3318 3313 self.opener.stat(path).st_size for path in self.files()
3319 3314 )
3320 3315
3321 3316 return d
3322 3317
3323 3318 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3324 3319 if not self.hassidedata:
3325 3320 return
3326 3321 # revlog formats with sidedata support does not support inline
3327 3322 assert not self._inline
3328 3323 if not helpers[1] and not helpers[2]:
3329 3324 # Nothing to generate or remove
3330 3325 return
3331 3326
3332 3327 new_entries = []
3333 3328 # append the new sidedata
3334 3329 with self._writing(transaction):
3335 3330 ifh, dfh, sdfh = self._writinghandles
3336 3331 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3337 3332
3338 3333 current_offset = sdfh.tell()
3339 3334 for rev in range(startrev, endrev + 1):
3340 3335 entry = self.index[rev]
3341 3336 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3342 3337 store=self,
3343 3338 sidedata_helpers=helpers,
3344 3339 sidedata={},
3345 3340 rev=rev,
3346 3341 )
3347 3342
3348 3343 serialized_sidedata = sidedatautil.serialize_sidedata(
3349 3344 new_sidedata
3350 3345 )
3351 3346
3352 3347 sidedata_compression_mode = COMP_MODE_INLINE
3353 3348 if serialized_sidedata and self.hassidedata:
3354 3349 sidedata_compression_mode = COMP_MODE_PLAIN
3355 3350 h, comp_sidedata = self.compress(serialized_sidedata)
3356 3351 if (
3357 3352 h != b'u'
3358 3353 and comp_sidedata[0] != b'\0'
3359 3354 and len(comp_sidedata) < len(serialized_sidedata)
3360 3355 ):
3361 3356 assert not h
3362 3357 if (
3363 3358 comp_sidedata[0]
3364 3359 == self._docket.default_compression_header
3365 3360 ):
3366 3361 sidedata_compression_mode = COMP_MODE_DEFAULT
3367 3362 serialized_sidedata = comp_sidedata
3368 3363 else:
3369 3364 sidedata_compression_mode = COMP_MODE_INLINE
3370 3365 serialized_sidedata = comp_sidedata
3371 3366 if entry[8] != 0 or entry[9] != 0:
3372 3367 # rewriting entries that already have sidedata is not
3373 3368 # supported yet, because it introduces garbage data in the
3374 3369 # revlog.
3375 3370 msg = b"rewriting existing sidedata is not supported yet"
3376 3371 raise error.Abort(msg)
3377 3372
3378 3373 # Apply (potential) flags to add and to remove after running
3379 3374 # the sidedata helpers
3380 3375 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3381 3376 entry_update = (
3382 3377 current_offset,
3383 3378 len(serialized_sidedata),
3384 3379 new_offset_flags,
3385 3380 sidedata_compression_mode,
3386 3381 )
3387 3382
3388 3383 # the sidedata computation might have move the file cursors around
3389 3384 sdfh.seek(current_offset, os.SEEK_SET)
3390 3385 sdfh.write(serialized_sidedata)
3391 3386 new_entries.append(entry_update)
3392 3387 current_offset += len(serialized_sidedata)
3393 3388 self._docket.sidedata_end = sdfh.tell()
3394 3389
3395 3390 # rewrite the new index entries
3396 3391 ifh.seek(startrev * self.index.entry_size)
3397 3392 for i, e in enumerate(new_entries):
3398 3393 rev = startrev + i
3399 3394 self.index.replace_sidedata_info(rev, *e)
3400 3395 packed = self.index.entry_binary(rev)
3401 3396 if rev == 0 and self._docket is None:
3402 3397 header = self._format_flags | self._format_version
3403 3398 header = self.index.pack_header(header)
3404 3399 packed = header + packed
3405 3400 ifh.write(packed)
@@ -1,8 +1,16 b''
1 1 # mercurial.revlogutils -- basic utilities for revlog
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9
10 from ..interfaces import repository
11
12
13 def offset_type(offset, type):
14 if (type & ~repository.REVISION_FLAGS_KNOWN) != 0:
15 raise ValueError(b'unknown revlog index flags: %d' % type)
16 return int(int(offset) << 16 | type)
@@ -1,163 +1,158 b''
1 1 # revlogv0 - code related to revlog format "V0"
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9
10 10 from ..node import sha1nodeconstants
11 11 from .constants import (
12 12 COMP_MODE_INLINE,
13 13 INDEX_ENTRY_V0,
14 14 )
15 15 from ..i18n import _
16 16
17 17 from .. import (
18 18 error,
19 19 node,
20 20 pycompat,
21 revlogutils,
21 22 util,
22 23 )
23 24
24 25 from . import (
25 26 flagutil,
26 27 nodemap as nodemaputil,
27 28 )
28 29
29 30
30 31 def getoffset(q):
31 32 return int(q >> 16)
32 33
33 34
34 35 def gettype(q):
35 36 return int(q & 0xFFFF)
36 37
37 38
38 def offset_type(offset, type):
39 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
40 raise ValueError(b'unknown revlog index flags')
41 return int(int(offset) << 16 | type)
42
43
44 39 class revlogoldindex(list):
45 40 rust_ext_compat = 0
46 41 entry_size = INDEX_ENTRY_V0.size
47 42 null_item = (
48 43 0,
49 44 0,
50 45 0,
51 46 -1,
52 47 -1,
53 48 -1,
54 49 -1,
55 50 sha1nodeconstants.nullid,
56 51 0,
57 52 0,
58 53 COMP_MODE_INLINE,
59 54 COMP_MODE_INLINE,
60 55 )
61 56
62 57 @property
63 58 def nodemap(self):
64 59 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 60 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 61 return self._nodemap
67 62
68 63 @util.propertycache
69 64 def _nodemap(self):
70 65 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: node.nullrev})
71 66 for r in range(0, len(self)):
72 67 n = self[r][7]
73 68 nodemap[n] = r
74 69 return nodemap
75 70
76 71 def has_node(self, node):
77 72 """return True if the node exist in the index"""
78 73 return node in self._nodemap
79 74
80 75 def rev(self, node):
81 76 """return a revision for a node
82 77
83 78 If the node is unknown, raise a RevlogError"""
84 79 return self._nodemap[node]
85 80
86 81 def get_rev(self, node):
87 82 """return a revision for a node
88 83
89 84 If the node is unknown, return None"""
90 85 return self._nodemap.get(node)
91 86
92 87 def append(self, tup):
93 88 self._nodemap[tup[7]] = len(self)
94 89 super(revlogoldindex, self).append(tup)
95 90
96 91 def __delitem__(self, i):
97 92 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
98 93 raise ValueError(b"deleting slices only supports a:-1 with step 1")
99 94 for r in pycompat.xrange(i.start, len(self)):
100 95 del self._nodemap[self[r][7]]
101 96 super(revlogoldindex, self).__delitem__(i)
102 97
103 98 def clearcaches(self):
104 99 self.__dict__.pop('_nodemap', None)
105 100
106 101 def __getitem__(self, i):
107 102 if i == -1:
108 103 return self.null_item
109 104 return list.__getitem__(self, i)
110 105
111 106 def pack_header(self, header):
112 107 """pack header information in binary"""
113 108 return b''
114 109
115 110 def entry_binary(self, rev):
116 111 """return the raw binary string representing a revision"""
117 112 entry = self[rev]
118 113 if gettype(entry[0]):
119 114 raise error.RevlogError(
120 115 _(b'index entry flags need revlog version 1')
121 116 )
122 117 e2 = (
123 118 getoffset(entry[0]),
124 119 entry[1],
125 120 entry[3],
126 121 entry[4],
127 122 self[entry[5]][7],
128 123 self[entry[6]][7],
129 124 entry[7],
130 125 )
131 126 return INDEX_ENTRY_V0.pack(*e2)
132 127
133 128
134 129 def parse_index_v0(data, inline):
135 130 s = INDEX_ENTRY_V0.size
136 131 index = []
137 132 nodemap = nodemaputil.NodeMap({node.nullid: node.nullrev})
138 133 n = off = 0
139 134 l = len(data)
140 135 while off + s <= l:
141 136 cur = data[off : off + s]
142 137 off += s
143 138 e = INDEX_ENTRY_V0.unpack(cur)
144 139 # transform to revlogv1 format
145 140 e2 = (
146 offset_type(e[0], 0),
141 revlogutils.offset_type(e[0], 0),
147 142 e[1],
148 143 -1,
149 144 e[2],
150 145 e[3],
151 146 nodemap.get(e[4], node.nullrev),
152 147 nodemap.get(e[5], node.nullrev),
153 148 e[6],
154 149 0, # no side data support
155 150 0, # no side data support
156 151 COMP_MODE_INLINE,
157 152 )
158 153 index.append(e2)
159 154 nodemap[e[6]] = n
160 155 n += 1
161 156
162 157 index = revlogoldindex(index)
163 158 return index, None
@@ -1,159 +1,161 b''
1 1 # coding=UTF-8
2 2
3 3 from __future__ import absolute_import
4 4
5 5 import base64
6 6 import zlib
7 7
8 8 from mercurial import (
9 9 bundlecaches,
10 10 changegroup,
11 11 extensions,
12 12 revlog,
13 13 util,
14 14 )
15 15 from mercurial.revlogutils import flagutil
16 from mercurial.interfaces import repository
16 17
17 18 # Test only: These flags are defined here only in the context of testing the
18 19 # behavior of the flag processor. The canonical way to add flags is to get in
19 20 # touch with the community and make them known in revlog.
20 21 REVIDX_NOOP = 1 << 3
21 22 REVIDX_BASE64 = 1 << 2
22 23 REVIDX_GZIP = 1 << 1
23 24 REVIDX_FAIL = 1
24 25
25 26
26 27 def validatehash(self, text):
27 28 return True
28 29
29 30
30 31 def bypass(self, text):
31 32 return False
32 33
33 34
34 35 def noopdonothing(self, text):
35 36 return (text, True)
36 37
37 38
38 39 def noopdonothingread(self, text):
39 40 return (text, True)
40 41
41 42
42 43 def b64encode(self, text):
43 44 return (base64.b64encode(text), False)
44 45
45 46
46 47 def b64decode(self, text):
47 48 return (base64.b64decode(text), True)
48 49
49 50
50 51 def gzipcompress(self, text):
51 52 return (zlib.compress(text), False)
52 53
53 54
54 55 def gzipdecompress(self, text):
55 56 return (zlib.decompress(text), True)
56 57
57 58
58 59 def supportedoutgoingversions(orig, repo):
59 60 versions = orig(repo)
60 61 versions.discard(b'01')
61 62 versions.discard(b'02')
62 63 versions.add(b'03')
63 64 return versions
64 65
65 66
66 67 def allsupportedversions(orig, ui):
67 68 versions = orig(ui)
68 69 versions.add(b'03')
69 70 return versions
70 71
71 72
72 73 def makewrappedfile(obj):
73 74 class wrappedfile(obj.__class__):
74 75 def addrevision(
75 76 self,
76 77 text,
77 78 transaction,
78 79 link,
79 80 p1,
80 81 p2,
81 82 cachedelta=None,
82 83 node=None,
83 84 flags=flagutil.REVIDX_DEFAULT_FLAGS,
84 85 ):
85 86 if b'[NOOP]' in text:
86 87 flags |= REVIDX_NOOP
87 88
88 89 if b'[BASE64]' in text:
89 90 flags |= REVIDX_BASE64
90 91
91 92 if b'[GZIP]' in text:
92 93 flags |= REVIDX_GZIP
93 94
94 95 # This addrevision wrapper is meant to add a flag we will not have
95 96 # transforms registered for, ensuring we handle this error case.
96 97 if b'[FAIL]' in text:
97 98 flags |= REVIDX_FAIL
98 99
99 100 return super(wrappedfile, self).addrevision(
100 101 text,
101 102 transaction,
102 103 link,
103 104 p1,
104 105 p2,
105 106 cachedelta=cachedelta,
106 107 node=node,
107 108 flags=flags,
108 109 )
109 110
110 111 obj.__class__ = wrappedfile
111 112
112 113
113 114 def reposetup(ui, repo):
114 115 class wrappingflagprocessorrepo(repo.__class__):
115 116 def file(self, f):
116 117 orig = super(wrappingflagprocessorrepo, self).file(f)
117 118 makewrappedfile(orig)
118 119 return orig
119 120
120 121 repo.__class__ = wrappingflagprocessorrepo
121 122
122 123
123 124 def extsetup(ui):
124 125 # Enable changegroup3 for flags to be sent over the wire
125 126 wrapfunction = extensions.wrapfunction
126 127 wrapfunction(
127 128 changegroup, 'supportedoutgoingversions', supportedoutgoingversions
128 129 )
129 130 wrapfunction(changegroup, 'allsupportedversions', allsupportedversions)
130 131
131 132 # Teach revlog about our test flags
132 133 flags = [REVIDX_NOOP, REVIDX_BASE64, REVIDX_GZIP, REVIDX_FAIL]
133 134 flagutil.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
135 repository.REVISION_FLAGS_KNOWN |= util.bitsfrom(flags)
134 136 revlog.REVIDX_FLAGS_ORDER.extend(flags)
135 137
136 138 # Teach exchange to use changegroup 3
137 139 for k in bundlecaches._bundlespeccontentopts.keys():
138 140 bundlecaches._bundlespeccontentopts[k][b"cg.version"] = b"03"
139 141
140 142 # Register flag processors for each extension
141 143 flagutil.addflagprocessor(
142 144 REVIDX_NOOP,
143 145 (
144 146 noopdonothingread,
145 147 noopdonothing,
146 148 validatehash,
147 149 ),
148 150 )
149 151 flagutil.addflagprocessor(
150 152 REVIDX_BASE64,
151 153 (
152 154 b64decode,
153 155 b64encode,
154 156 bypass,
155 157 ),
156 158 )
157 159 flagutil.addflagprocessor(
158 160 REVIDX_GZIP, (gzipdecompress, gzipcompress, bypass)
159 161 )
General Comments 0
You need to be logged in to leave comments. Login now