##// END OF EJS Templates
revlog: deal with nodemap deletion within the index...
marmoute -
r43839:5b556d46 default draft
parent child Browse files
Show More
@@ -1,670 +1,669 b''
1 1 # bundlerepo.py - repository class for viewing uncompressed bundles
2 2 #
3 3 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Repository class for viewing uncompressed bundles.
9 9
10 10 This provides a read-only repository interface to bundles as if they
11 11 were part of the actual repository.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import os
17 17 import shutil
18 18
19 19 from .i18n import _
20 20 from .node import nullid, nullrev
21 21
22 22 from . import (
23 23 bundle2,
24 24 changegroup,
25 25 changelog,
26 26 cmdutil,
27 27 discovery,
28 28 encoding,
29 29 error,
30 30 exchange,
31 31 filelog,
32 32 localrepo,
33 33 manifest,
34 34 mdiff,
35 35 node as nodemod,
36 36 pathutil,
37 37 phases,
38 38 pycompat,
39 39 revlog,
40 40 util,
41 41 vfs as vfsmod,
42 42 )
43 43
44 44
45 45 class bundlerevlog(revlog.revlog):
46 46 def __init__(self, opener, indexfile, cgunpacker, linkmapper):
47 47 # How it works:
48 48 # To retrieve a revision, we need to know the offset of the revision in
49 49 # the bundle (an unbundle object). We store this offset in the index
50 50 # (start). The base of the delta is stored in the base field.
51 51 #
52 52 # To differentiate a rev in the bundle from a rev in the revlog, we
53 53 # check revision against repotiprev.
54 54 opener = vfsmod.readonlyvfs(opener)
55 55 revlog.revlog.__init__(self, opener, indexfile)
56 56 self.bundle = cgunpacker
57 57 n = len(self)
58 58 self.repotiprev = n - 1
59 59 self.bundlerevs = set() # used by 'bundle()' revset expression
60 60 for deltadata in cgunpacker.deltaiter():
61 61 node, p1, p2, cs, deltabase, delta, flags = deltadata
62 62
63 63 size = len(delta)
64 64 start = cgunpacker.tell() - size
65 65
66 66 link = linkmapper(cs)
67 67 if node in self.nodemap:
68 68 # this can happen if two branches make the same change
69 69 self.bundlerevs.add(self.nodemap[node])
70 70 continue
71 71
72 72 for p in (p1, p2):
73 73 if p not in self.nodemap:
74 74 raise error.LookupError(
75 75 p, self.indexfile, _(b"unknown parent")
76 76 )
77 77
78 78 if deltabase not in self.nodemap:
79 79 raise LookupError(
80 80 deltabase, self.indexfile, _(b'unknown delta base')
81 81 )
82 82
83 83 baserev = self.rev(deltabase)
84 84 # start, size, full unc. size, base (unused), link, p1, p2, node
85 85 e = (
86 86 revlog.offset_type(start, flags),
87 87 size,
88 88 -1,
89 89 baserev,
90 90 link,
91 91 self.rev(p1),
92 92 self.rev(p2),
93 93 node,
94 94 )
95 95 self.index.append(e)
96 self.nodemap[node] = n
97 96 self.bundlerevs.add(n)
98 97 n += 1
99 98
100 99 def _chunk(self, rev, df=None):
101 100 # Warning: in case of bundle, the diff is against what we stored as
102 101 # delta base, not against rev - 1
103 102 # XXX: could use some caching
104 103 if rev <= self.repotiprev:
105 104 return revlog.revlog._chunk(self, rev)
106 105 self.bundle.seek(self.start(rev))
107 106 return self.bundle.read(self.length(rev))
108 107
109 108 def revdiff(self, rev1, rev2):
110 109 """return or calculate a delta between two revisions"""
111 110 if rev1 > self.repotiprev and rev2 > self.repotiprev:
112 111 # hot path for bundle
113 112 revb = self.index[rev2][3]
114 113 if revb == rev1:
115 114 return self._chunk(rev2)
116 115 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
117 116 return revlog.revlog.revdiff(self, rev1, rev2)
118 117
119 118 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
120 119
121 120 def _rawtext(self, node, rev, _df=None):
122 121 if rev is None:
123 122 rev = self.rev(node)
124 123 validated = False
125 124 rawtext = None
126 125 chain = []
127 126 iterrev = rev
128 127 # reconstruct the revision if it is from a changegroup
129 128 while iterrev > self.repotiprev:
130 129 if self._revisioncache and self._revisioncache[1] == iterrev:
131 130 rawtext = self._revisioncache[2]
132 131 break
133 132 chain.append(iterrev)
134 133 iterrev = self.index[iterrev][3]
135 134 if iterrev == nullrev:
136 135 rawtext = b''
137 136 elif rawtext is None:
138 137 r = super(bundlerevlog, self)._rawtext(
139 138 self.node(iterrev), iterrev, _df=_df
140 139 )
141 140 __, rawtext, validated = r
142 141 if chain:
143 142 validated = False
144 143 while chain:
145 144 delta = self._chunk(chain.pop())
146 145 rawtext = mdiff.patches(rawtext, [delta])
147 146 return rev, rawtext, validated
148 147
149 148 def addrevision(self, *args, **kwargs):
150 149 raise NotImplementedError
151 150
152 151 def addgroup(self, *args, **kwargs):
153 152 raise NotImplementedError
154 153
155 154 def strip(self, *args, **kwargs):
156 155 raise NotImplementedError
157 156
158 157 def checksize(self):
159 158 raise NotImplementedError
160 159
161 160
162 161 class bundlechangelog(bundlerevlog, changelog.changelog):
163 162 def __init__(self, opener, cgunpacker):
164 163 changelog.changelog.__init__(self, opener)
165 164 linkmapper = lambda x: x
166 165 bundlerevlog.__init__(
167 166 self, opener, self.indexfile, cgunpacker, linkmapper
168 167 )
169 168
170 169
171 170 class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
172 171 def __init__(
173 172 self, opener, cgunpacker, linkmapper, dirlogstarts=None, dir=b''
174 173 ):
175 174 manifest.manifestrevlog.__init__(self, opener, tree=dir)
176 175 bundlerevlog.__init__(
177 176 self, opener, self.indexfile, cgunpacker, linkmapper
178 177 )
179 178 if dirlogstarts is None:
180 179 dirlogstarts = {}
181 180 if self.bundle.version == b"03":
182 181 dirlogstarts = _getfilestarts(self.bundle)
183 182 self._dirlogstarts = dirlogstarts
184 183 self._linkmapper = linkmapper
185 184
186 185 def dirlog(self, d):
187 186 if d in self._dirlogstarts:
188 187 self.bundle.seek(self._dirlogstarts[d])
189 188 return bundlemanifest(
190 189 self.opener,
191 190 self.bundle,
192 191 self._linkmapper,
193 192 self._dirlogstarts,
194 193 dir=d,
195 194 )
196 195 return super(bundlemanifest, self).dirlog(d)
197 196
198 197
199 198 class bundlefilelog(filelog.filelog):
200 199 def __init__(self, opener, path, cgunpacker, linkmapper):
201 200 filelog.filelog.__init__(self, opener, path)
202 201 self._revlog = bundlerevlog(
203 202 opener, self.indexfile, cgunpacker, linkmapper
204 203 )
205 204
206 205
207 206 class bundlepeer(localrepo.localpeer):
208 207 def canpush(self):
209 208 return False
210 209
211 210
212 211 class bundlephasecache(phases.phasecache):
213 212 def __init__(self, *args, **kwargs):
214 213 super(bundlephasecache, self).__init__(*args, **kwargs)
215 214 if util.safehasattr(self, 'opener'):
216 215 self.opener = vfsmod.readonlyvfs(self.opener)
217 216
218 217 def write(self):
219 218 raise NotImplementedError
220 219
221 220 def _write(self, fp):
222 221 raise NotImplementedError
223 222
224 223 def _updateroots(self, phase, newroots, tr):
225 224 self.phaseroots[phase] = newroots
226 225 self.invalidate()
227 226 self.dirty = True
228 227
229 228
230 229 def _getfilestarts(cgunpacker):
231 230 filespos = {}
232 231 for chunkdata in iter(cgunpacker.filelogheader, {}):
233 232 fname = chunkdata[b'filename']
234 233 filespos[fname] = cgunpacker.tell()
235 234 for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
236 235 pass
237 236 return filespos
238 237
239 238
240 239 class bundlerepository(object):
241 240 """A repository instance that is a union of a local repo and a bundle.
242 241
243 242 Instances represent a read-only repository composed of a local repository
244 243 with the contents of a bundle file applied. The repository instance is
245 244 conceptually similar to the state of a repository after an
246 245 ``hg unbundle`` operation. However, the contents of the bundle are never
247 246 applied to the actual base repository.
248 247
249 248 Instances constructed directly are not usable as repository objects.
250 249 Use instance() or makebundlerepository() to create instances.
251 250 """
252 251
253 252 def __init__(self, bundlepath, url, tempparent):
254 253 self._tempparent = tempparent
255 254 self._url = url
256 255
257 256 self.ui.setconfig(b'phases', b'publish', False, b'bundlerepo')
258 257
259 258 self.tempfile = None
260 259 f = util.posixfile(bundlepath, b"rb")
261 260 bundle = exchange.readbundle(self.ui, f, bundlepath)
262 261
263 262 if isinstance(bundle, bundle2.unbundle20):
264 263 self._bundlefile = bundle
265 264 self._cgunpacker = None
266 265
267 266 cgpart = None
268 267 for part in bundle.iterparts(seekable=True):
269 268 if part.type == b'changegroup':
270 269 if cgpart:
271 270 raise NotImplementedError(
272 271 b"can't process multiple changegroups"
273 272 )
274 273 cgpart = part
275 274
276 275 self._handlebundle2part(bundle, part)
277 276
278 277 if not cgpart:
279 278 raise error.Abort(_(b"No changegroups found"))
280 279
281 280 # This is required to placate a later consumer, which expects
282 281 # the payload offset to be at the beginning of the changegroup.
283 282 # We need to do this after the iterparts() generator advances
284 283 # because iterparts() will seek to end of payload after the
285 284 # generator returns control to iterparts().
286 285 cgpart.seek(0, os.SEEK_SET)
287 286
288 287 elif isinstance(bundle, changegroup.cg1unpacker):
289 288 if bundle.compressed():
290 289 f = self._writetempbundle(
291 290 bundle.read, b'.hg10un', header=b'HG10UN'
292 291 )
293 292 bundle = exchange.readbundle(self.ui, f, bundlepath, self.vfs)
294 293
295 294 self._bundlefile = bundle
296 295 self._cgunpacker = bundle
297 296 else:
298 297 raise error.Abort(
299 298 _(b'bundle type %s cannot be read') % type(bundle)
300 299 )
301 300
302 301 # dict with the mapping 'filename' -> position in the changegroup.
303 302 self._cgfilespos = {}
304 303
305 304 self.firstnewrev = self.changelog.repotiprev + 1
306 305 phases.retractboundary(
307 306 self,
308 307 None,
309 308 phases.draft,
310 309 [ctx.node() for ctx in self[self.firstnewrev :]],
311 310 )
312 311
313 312 def _handlebundle2part(self, bundle, part):
314 313 if part.type != b'changegroup':
315 314 return
316 315
317 316 cgstream = part
318 317 version = part.params.get(b'version', b'01')
319 318 legalcgvers = changegroup.supportedincomingversions(self)
320 319 if version not in legalcgvers:
321 320 msg = _(b'Unsupported changegroup version: %s')
322 321 raise error.Abort(msg % version)
323 322 if bundle.compressed():
324 323 cgstream = self._writetempbundle(part.read, b'.cg%sun' % version)
325 324
326 325 self._cgunpacker = changegroup.getunbundler(version, cgstream, b'UN')
327 326
328 327 def _writetempbundle(self, readfn, suffix, header=b''):
329 328 """Write a temporary file to disk
330 329 """
331 330 fdtemp, temp = self.vfs.mkstemp(prefix=b"hg-bundle-", suffix=suffix)
332 331 self.tempfile = temp
333 332
334 333 with os.fdopen(fdtemp, 'wb') as fptemp:
335 334 fptemp.write(header)
336 335 while True:
337 336 chunk = readfn(2 ** 18)
338 337 if not chunk:
339 338 break
340 339 fptemp.write(chunk)
341 340
342 341 return self.vfs.open(self.tempfile, mode=b"rb")
343 342
344 343 @localrepo.unfilteredpropertycache
345 344 def _phasecache(self):
346 345 return bundlephasecache(self, self._phasedefaults)
347 346
348 347 @localrepo.unfilteredpropertycache
349 348 def changelog(self):
350 349 # consume the header if it exists
351 350 self._cgunpacker.changelogheader()
352 351 c = bundlechangelog(self.svfs, self._cgunpacker)
353 352 self.manstart = self._cgunpacker.tell()
354 353 return c
355 354
356 355 def _refreshchangelog(self):
357 356 # changelog for bundle repo are not filecache, this method is not
358 357 # applicable.
359 358 pass
360 359
361 360 @localrepo.unfilteredpropertycache
362 361 def manifestlog(self):
363 362 self._cgunpacker.seek(self.manstart)
364 363 # consume the header if it exists
365 364 self._cgunpacker.manifestheader()
366 365 linkmapper = self.unfiltered().changelog.rev
367 366 rootstore = bundlemanifest(self.svfs, self._cgunpacker, linkmapper)
368 367 self.filestart = self._cgunpacker.tell()
369 368
370 369 return manifest.manifestlog(
371 370 self.svfs, self, rootstore, self.narrowmatch()
372 371 )
373 372
374 373 def _consumemanifest(self):
375 374 """Consumes the manifest portion of the bundle, setting filestart so the
376 375 file portion can be read."""
377 376 self._cgunpacker.seek(self.manstart)
378 377 self._cgunpacker.manifestheader()
379 378 for delta in self._cgunpacker.deltaiter():
380 379 pass
381 380 self.filestart = self._cgunpacker.tell()
382 381
383 382 @localrepo.unfilteredpropertycache
384 383 def manstart(self):
385 384 self.changelog
386 385 return self.manstart
387 386
388 387 @localrepo.unfilteredpropertycache
389 388 def filestart(self):
390 389 self.manifestlog
391 390
392 391 # If filestart was not set by self.manifestlog, that means the
393 392 # manifestlog implementation did not consume the manifests from the
394 393 # changegroup (ex: it might be consuming trees from a separate bundle2
395 394 # part instead). So we need to manually consume it.
396 395 if 'filestart' not in self.__dict__:
397 396 self._consumemanifest()
398 397
399 398 return self.filestart
400 399
401 400 def url(self):
402 401 return self._url
403 402
404 403 def file(self, f):
405 404 if not self._cgfilespos:
406 405 self._cgunpacker.seek(self.filestart)
407 406 self._cgfilespos = _getfilestarts(self._cgunpacker)
408 407
409 408 if f in self._cgfilespos:
410 409 self._cgunpacker.seek(self._cgfilespos[f])
411 410 linkmapper = self.unfiltered().changelog.rev
412 411 return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
413 412 else:
414 413 return super(bundlerepository, self).file(f)
415 414
416 415 def close(self):
417 416 """Close assigned bundle file immediately."""
418 417 self._bundlefile.close()
419 418 if self.tempfile is not None:
420 419 self.vfs.unlink(self.tempfile)
421 420 if self._tempparent:
422 421 shutil.rmtree(self._tempparent, True)
423 422
424 423 def cancopy(self):
425 424 return False
426 425
427 426 def peer(self):
428 427 return bundlepeer(self)
429 428
430 429 def getcwd(self):
431 430 return encoding.getcwd() # always outside the repo
432 431
433 432 # Check if parents exist in localrepo before setting
434 433 def setparents(self, p1, p2=nullid):
435 434 p1rev = self.changelog.rev(p1)
436 435 p2rev = self.changelog.rev(p2)
437 436 msg = _(b"setting parent to node %s that only exists in the bundle\n")
438 437 if self.changelog.repotiprev < p1rev:
439 438 self.ui.warn(msg % nodemod.hex(p1))
440 439 if self.changelog.repotiprev < p2rev:
441 440 self.ui.warn(msg % nodemod.hex(p2))
442 441 return super(bundlerepository, self).setparents(p1, p2)
443 442
444 443
445 444 def instance(ui, path, create, intents=None, createopts=None):
446 445 if create:
447 446 raise error.Abort(_(b'cannot create new bundle repository'))
448 447 # internal config: bundle.mainreporoot
449 448 parentpath = ui.config(b"bundle", b"mainreporoot")
450 449 if not parentpath:
451 450 # try to find the correct path to the working directory repo
452 451 parentpath = cmdutil.findrepo(encoding.getcwd())
453 452 if parentpath is None:
454 453 parentpath = b''
455 454 if parentpath:
456 455 # Try to make the full path relative so we get a nice, short URL.
457 456 # In particular, we don't want temp dir names in test outputs.
458 457 cwd = encoding.getcwd()
459 458 if parentpath == cwd:
460 459 parentpath = b''
461 460 else:
462 461 cwd = pathutil.normasprefix(cwd)
463 462 if parentpath.startswith(cwd):
464 463 parentpath = parentpath[len(cwd) :]
465 464 u = util.url(path)
466 465 path = u.localpath()
467 466 if u.scheme == b'bundle':
468 467 s = path.split(b"+", 1)
469 468 if len(s) == 1:
470 469 repopath, bundlename = parentpath, s[0]
471 470 else:
472 471 repopath, bundlename = s
473 472 else:
474 473 repopath, bundlename = parentpath, path
475 474
476 475 return makebundlerepository(ui, repopath, bundlename)
477 476
478 477
479 478 def makebundlerepository(ui, repopath, bundlepath):
480 479 """Make a bundle repository object based on repo and bundle paths."""
481 480 if repopath:
482 481 url = b'bundle:%s+%s' % (util.expandpath(repopath), bundlepath)
483 482 else:
484 483 url = b'bundle:%s' % bundlepath
485 484
486 485 # Because we can't make any guarantees about the type of the base
487 486 # repository, we can't have a static class representing the bundle
488 487 # repository. We also can't make any guarantees about how to even
489 488 # call the base repository's constructor!
490 489 #
491 490 # So, our strategy is to go through ``localrepo.instance()`` to construct
492 491 # a repo instance. Then, we dynamically create a new type derived from
493 492 # both it and our ``bundlerepository`` class which overrides some
494 493 # functionality. We then change the type of the constructed repository
495 494 # to this new type and initialize the bundle-specific bits of it.
496 495
497 496 try:
498 497 repo = localrepo.instance(ui, repopath, create=False)
499 498 tempparent = None
500 499 except error.RepoError:
501 500 tempparent = pycompat.mkdtemp()
502 501 try:
503 502 repo = localrepo.instance(ui, tempparent, create=True)
504 503 except Exception:
505 504 shutil.rmtree(tempparent)
506 505 raise
507 506
508 507 class derivedbundlerepository(bundlerepository, repo.__class__):
509 508 pass
510 509
511 510 repo.__class__ = derivedbundlerepository
512 511 bundlerepository.__init__(repo, bundlepath, url, tempparent)
513 512
514 513 return repo
515 514
516 515
517 516 class bundletransactionmanager(object):
518 517 def transaction(self):
519 518 return None
520 519
521 520 def close(self):
522 521 raise NotImplementedError
523 522
524 523 def release(self):
525 524 raise NotImplementedError
526 525
527 526
528 527 def getremotechanges(
529 528 ui, repo, peer, onlyheads=None, bundlename=None, force=False
530 529 ):
531 530 '''obtains a bundle of changes incoming from peer
532 531
533 532 "onlyheads" restricts the returned changes to those reachable from the
534 533 specified heads.
535 534 "bundlename", if given, stores the bundle to this file path permanently;
536 535 otherwise it's stored to a temp file and gets deleted again when you call
537 536 the returned "cleanupfn".
538 537 "force" indicates whether to proceed on unrelated repos.
539 538
540 539 Returns a tuple (local, csets, cleanupfn):
541 540
542 541 "local" is a local repo from which to obtain the actual incoming
543 542 changesets; it is a bundlerepo for the obtained bundle when the
544 543 original "peer" is remote.
545 544 "csets" lists the incoming changeset node ids.
546 545 "cleanupfn" must be called without arguments when you're done processing
547 546 the changes; it closes both the original "peer" and the one returned
548 547 here.
549 548 '''
550 549 tmp = discovery.findcommonincoming(repo, peer, heads=onlyheads, force=force)
551 550 common, incoming, rheads = tmp
552 551 if not incoming:
553 552 try:
554 553 if bundlename:
555 554 os.unlink(bundlename)
556 555 except OSError:
557 556 pass
558 557 return repo, [], peer.close
559 558
560 559 commonset = set(common)
561 560 rheads = [x for x in rheads if x not in commonset]
562 561
563 562 bundle = None
564 563 bundlerepo = None
565 564 localrepo = peer.local()
566 565 if bundlename or not localrepo:
567 566 # create a bundle (uncompressed if peer repo is not local)
568 567
569 568 # developer config: devel.legacy.exchange
570 569 legexc = ui.configlist(b'devel', b'legacy.exchange')
571 570 forcebundle1 = b'bundle2' not in legexc and b'bundle1' in legexc
572 571 canbundle2 = (
573 572 not forcebundle1
574 573 and peer.capable(b'getbundle')
575 574 and peer.capable(b'bundle2')
576 575 )
577 576 if canbundle2:
578 577 with peer.commandexecutor() as e:
579 578 b2 = e.callcommand(
580 579 b'getbundle',
581 580 {
582 581 b'source': b'incoming',
583 582 b'common': common,
584 583 b'heads': rheads,
585 584 b'bundlecaps': exchange.caps20to10(
586 585 repo, role=b'client'
587 586 ),
588 587 b'cg': True,
589 588 },
590 589 ).result()
591 590
592 591 fname = bundle = changegroup.writechunks(
593 592 ui, b2._forwardchunks(), bundlename
594 593 )
595 594 else:
596 595 if peer.capable(b'getbundle'):
597 596 with peer.commandexecutor() as e:
598 597 cg = e.callcommand(
599 598 b'getbundle',
600 599 {
601 600 b'source': b'incoming',
602 601 b'common': common,
603 602 b'heads': rheads,
604 603 },
605 604 ).result()
606 605 elif onlyheads is None and not peer.capable(b'changegroupsubset'):
607 606 # compat with older servers when pulling all remote heads
608 607
609 608 with peer.commandexecutor() as e:
610 609 cg = e.callcommand(
611 610 b'changegroup',
612 611 {b'nodes': incoming, b'source': b'incoming',},
613 612 ).result()
614 613
615 614 rheads = None
616 615 else:
617 616 with peer.commandexecutor() as e:
618 617 cg = e.callcommand(
619 618 b'changegroupsubset',
620 619 {
621 620 b'bases': incoming,
622 621 b'heads': rheads,
623 622 b'source': b'incoming',
624 623 },
625 624 ).result()
626 625
627 626 if localrepo:
628 627 bundletype = b"HG10BZ"
629 628 else:
630 629 bundletype = b"HG10UN"
631 630 fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
632 631 # keep written bundle?
633 632 if bundlename:
634 633 bundle = None
635 634 if not localrepo:
636 635 # use the created uncompressed bundlerepo
637 636 localrepo = bundlerepo = makebundlerepository(
638 637 repo.baseui, repo.root, fname
639 638 )
640 639
641 640 # this repo contains local and peer now, so filter out local again
642 641 common = repo.heads()
643 642 if localrepo:
644 643 # Part of common may be remotely filtered
645 644 # So use an unfiltered version
646 645 # The discovery process probably need cleanup to avoid that
647 646 localrepo = localrepo.unfiltered()
648 647
649 648 csets = localrepo.changelog.findmissing(common, rheads)
650 649
651 650 if bundlerepo:
652 651 reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
653 652
654 653 with peer.commandexecutor() as e:
655 654 remotephases = e.callcommand(
656 655 b'listkeys', {b'namespace': b'phases',}
657 656 ).result()
658 657
659 658 pullop = exchange.pulloperation(bundlerepo, peer, heads=reponodes)
660 659 pullop.trmanager = bundletransactionmanager()
661 660 exchange._pullapplyphases(pullop, remotephases)
662 661
663 662 def cleanup():
664 663 if bundlerepo:
665 664 bundlerepo.close()
666 665 if bundle:
667 666 os.unlink(bundle)
668 667 peer.close()
669 668
670 669 return (localrepo, csets, cleanup)
@@ -1,205 +1,213 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 revlogutils,
17 17 util,
18 18 )
19 19
20 20 stringio = pycompat.bytesio
21 21
22 22
23 23 _pack = struct.pack
24 24 _unpack = struct.unpack
25 25 _compress = zlib.compress
26 26 _decompress = zlib.decompress
27 27
28 28 # Some code below makes tuples directly because it's more convenient. However,
29 29 # code outside this module should always use dirstatetuple.
30 30 def dirstatetuple(*x):
31 31 # x is a tuple
32 32 return x
33 33
34 34
35 35 indexformatng = b">Qiiiiii20s12x"
36 36 indexfirst = struct.calcsize(b'Q')
37 37 sizeint = struct.calcsize(b'i')
38 38 indexsize = struct.calcsize(indexformatng)
39 39
40 40
41 41 def gettype(q):
42 42 return int(q & 0xFFFF)
43 43
44 44
45 45 def offset_type(offset, type):
46 46 return int(int(offset) << 16 | type)
47 47
48 48
49 49 class BaseIndexObject(object):
50 50 @util.propertycache
51 51 def nodemap(self):
52 52 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 53 for r in range(0, len(self)):
54 54 n = self[r][7]
55 55 nodemap[n] = r
56 56 return nodemap
57 57
58 def _stripnodes(self, start):
59 if 'nodemap' in vars(self):
60 for r in range(start, len(self)):
61 n = self[r][7]
62 del self.nodemap[n]
63
58 64 def clearcaches(self):
59 65 self.__dict__.pop('nodemap', None)
60 66
61 67 def __len__(self):
62 68 return self._lgt + len(self._extra)
63 69
64 70 def append(self, tup):
65 71 if 'nodemap' in vars(self):
66 72 self.nodemap[tup[7]] = len(self)
67 73 self._extra.append(tup)
68 74
69 75 def _check_index(self, i):
70 76 if not isinstance(i, int):
71 77 raise TypeError(b"expecting int indexes")
72 78 if i < 0 or i >= len(self):
73 79 raise IndexError
74 80
75 81 def __getitem__(self, i):
76 82 if i == -1:
77 83 return (0, 0, 0, -1, -1, -1, -1, nullid)
78 84 self._check_index(i)
79 85 if i >= self._lgt:
80 86 return self._extra[i - self._lgt]
81 87 index = self._calculate_index(i)
82 88 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
83 89 if i == 0:
84 90 e = list(r)
85 91 type = gettype(e[0])
86 92 e[0] = offset_type(0, type)
87 93 return tuple(e)
88 94 return r
89 95
90 96
91 97 class IndexObject(BaseIndexObject):
92 98 def __init__(self, data):
93 99 assert len(data) % indexsize == 0
94 100 self._data = data
95 101 self._lgt = len(data) // indexsize
96 102 self._extra = []
97 103
98 104 def _calculate_index(self, i):
99 105 return i * indexsize
100 106
101 107 def __delitem__(self, i):
102 108 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
103 109 raise ValueError(b"deleting slices only supports a:-1 with step 1")
104 110 i = i.start
105 111 self._check_index(i)
112 self._stripnodes(i)
106 113 if i < self._lgt:
107 114 self._data = self._data[: i * indexsize]
108 115 self._lgt = i
109 116 self._extra = []
110 117 else:
111 118 self._extra = self._extra[: i - self._lgt]
112 119
113 120
114 121 class InlinedIndexObject(BaseIndexObject):
115 122 def __init__(self, data, inline=0):
116 123 self._data = data
117 124 self._lgt = self._inline_scan(None)
118 125 self._inline_scan(self._lgt)
119 126 self._extra = []
120 127
121 128 def _inline_scan(self, lgt):
122 129 off = 0
123 130 if lgt is not None:
124 131 self._offsets = [0] * lgt
125 132 count = 0
126 133 while off <= len(self._data) - indexsize:
127 134 (s,) = struct.unpack(
128 135 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
129 136 )
130 137 if lgt is not None:
131 138 self._offsets[count] = off
132 139 count += 1
133 140 off += indexsize + s
134 141 if off != len(self._data):
135 142 raise ValueError(b"corrupted data")
136 143 return count
137 144
138 145 def __delitem__(self, i):
139 146 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
140 147 raise ValueError(b"deleting slices only supports a:-1 with step 1")
141 148 i = i.start
142 149 self._check_index(i)
150 self._stripnodes(i)
143 151 if i < self._lgt:
144 152 self._offsets = self._offsets[:i]
145 153 self._lgt = i
146 154 self._extra = []
147 155 else:
148 156 self._extra = self._extra[: i - self._lgt]
149 157
150 158 def _calculate_index(self, i):
151 159 return self._offsets[i]
152 160
153 161
154 162 def parse_index2(data, inline):
155 163 if not inline:
156 164 return IndexObject(data), None
157 165 return InlinedIndexObject(data, inline), (0, data)
158 166
159 167
160 168 def parse_dirstate(dmap, copymap, st):
161 169 parents = [st[:20], st[20:40]]
162 170 # dereference fields so they will be local in loop
163 171 format = b">cllll"
164 172 e_size = struct.calcsize(format)
165 173 pos1 = 40
166 174 l = len(st)
167 175
168 176 # the inner loop
169 177 while pos1 < l:
170 178 pos2 = pos1 + e_size
171 179 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
172 180 pos1 = pos2 + e[4]
173 181 f = st[pos2:pos1]
174 182 if b'\0' in f:
175 183 f, c = f.split(b'\0')
176 184 copymap[f] = c
177 185 dmap[f] = e[:4]
178 186 return parents
179 187
180 188
181 189 def pack_dirstate(dmap, copymap, pl, now):
182 190 now = int(now)
183 191 cs = stringio()
184 192 write = cs.write
185 193 write(b"".join(pl))
186 194 for f, e in pycompat.iteritems(dmap):
187 195 if e[0] == b'n' and e[3] == now:
188 196 # The file was last modified "simultaneously" with the current
189 197 # write to dirstate (i.e. within the same second for file-
190 198 # systems with a granularity of 1 sec). This commonly happens
191 199 # for at least a couple of files on 'update'.
192 200 # The user could change the file without changing its size
193 201 # within the same second. Invalidate the file's mtime in
194 202 # dirstate, forcing future 'status' calls to compare the
195 203 # contents of the file if the size is the same. This prevents
196 204 # mistakenly treating such files as clean.
197 205 e = dirstatetuple(e[0], e[1], e[2], -1)
198 206 dmap[f] = e
199 207
200 208 if f in copymap:
201 209 f = b"%s\0%s" % (f, copymap[f])
202 210 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
203 211 write(e)
204 212 write(f)
205 213 return cs.getvalue()
@@ -1,2955 +1,2960 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 @attr.s(slots=True, frozen=True)
152 152 class _revisioninfo(object):
153 153 """Information about a revision that allows building its fulltext
154 154 node: expected hash of the revision
155 155 p1, p2: parent revs of the revision
156 156 btext: built text cache consisting of a one-element list
157 157 cachedelta: (baserev, uncompressed_delta) or None
158 158 flags: flags associated to the revision storage
159 159
160 160 One of btext[0] or cachedelta must be set.
161 161 """
162 162
163 163 node = attr.ib()
164 164 p1 = attr.ib()
165 165 p2 = attr.ib()
166 166 btext = attr.ib()
167 167 textlen = attr.ib()
168 168 cachedelta = attr.ib()
169 169 flags = attr.ib()
170 170
171 171
172 172 @interfaceutil.implementer(repository.irevisiondelta)
173 173 @attr.s(slots=True)
174 174 class revlogrevisiondelta(object):
175 175 node = attr.ib()
176 176 p1node = attr.ib()
177 177 p2node = attr.ib()
178 178 basenode = attr.ib()
179 179 flags = attr.ib()
180 180 baserevisionsize = attr.ib()
181 181 revision = attr.ib()
182 182 delta = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 # index v0:
195 195 # 4 bytes: offset
196 196 # 4 bytes: compressed length
197 197 # 4 bytes: base rev
198 198 # 4 bytes: link rev
199 199 # 20 bytes: parent 1 nodeid
200 200 # 20 bytes: parent 2 nodeid
201 201 # 20 bytes: nodeid
202 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 203 indexformatv0_pack = indexformatv0.pack
204 204 indexformatv0_unpack = indexformatv0.unpack
205 205
206 206
207 207 class revlogoldindex(list):
208 208 @util.propertycache
209 209 def nodemap(self):
210 210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 211 for r in range(0, len(self)):
212 212 n = self[r][7]
213 213 nodemap[n] = r
214 214 return nodemap
215 215
216 216 def append(self, tup):
217 217 self.nodemap[tup[7]] = len(self)
218 218 super(revlogoldindex, self).append(tup)
219 219
220 def __delitem__(self, i):
221 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
222 raise ValueError(b"deleting slices only supports a:-1 with step 1")
223 for r in pycompat.xrange(i.start, len(self)):
224 del self.nodemap[self[r][7]]
225 super(revlogoldindex, self).__delitem__(i)
226
220 227 def clearcaches(self):
221 228 self.__dict__.pop('nodemap', None)
222 229
223 230 def __getitem__(self, i):
224 231 if i == -1:
225 232 return (0, 0, 0, -1, -1, -1, -1, nullid)
226 233 return list.__getitem__(self, i)
227 234
228 235
229 236 class revlogoldio(object):
230 237 def __init__(self):
231 238 self.size = indexformatv0.size
232 239
233 240 def parseindex(self, data, inline):
234 241 s = self.size
235 242 index = []
236 243 nodemap = revlogutils.NodeMap({nullid: nullrev})
237 244 n = off = 0
238 245 l = len(data)
239 246 while off + s <= l:
240 247 cur = data[off : off + s]
241 248 off += s
242 249 e = indexformatv0_unpack(cur)
243 250 # transform to revlogv1 format
244 251 e2 = (
245 252 offset_type(e[0], 0),
246 253 e[1],
247 254 -1,
248 255 e[2],
249 256 e[3],
250 257 nodemap.get(e[4], nullrev),
251 258 nodemap.get(e[5], nullrev),
252 259 e[6],
253 260 )
254 261 index.append(e2)
255 262 nodemap[e[6]] = n
256 263 n += 1
257 264
258 265 index = revlogoldindex(index)
259 266 return index, None
260 267
261 268 def packentry(self, entry, node, version, rev):
262 269 if gettype(entry[0]):
263 270 raise error.RevlogError(
264 271 _(b'index entry flags need revlog version 1')
265 272 )
266 273 e2 = (
267 274 getoffset(entry[0]),
268 275 entry[1],
269 276 entry[3],
270 277 entry[4],
271 278 node(entry[5]),
272 279 node(entry[6]),
273 280 entry[7],
274 281 )
275 282 return indexformatv0_pack(*e2)
276 283
277 284
278 285 # index ng:
279 286 # 6 bytes: offset
280 287 # 2 bytes: flags
281 288 # 4 bytes: compressed length
282 289 # 4 bytes: uncompressed length
283 290 # 4 bytes: base rev
284 291 # 4 bytes: link rev
285 292 # 4 bytes: parent 1 rev
286 293 # 4 bytes: parent 2 rev
287 294 # 32 bytes: nodeid
288 295 indexformatng = struct.Struct(b">Qiiiiii20s12x")
289 296 indexformatng_pack = indexformatng.pack
290 297 versionformat = struct.Struct(b">I")
291 298 versionformat_pack = versionformat.pack
292 299 versionformat_unpack = versionformat.unpack
293 300
294 301 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
295 302 # signed integer)
296 303 _maxentrysize = 0x7FFFFFFF
297 304
298 305
299 306 class revlogio(object):
300 307 def __init__(self):
301 308 self.size = indexformatng.size
302 309
303 310 def parseindex(self, data, inline):
304 311 # call the C implementation to parse the index data
305 312 index, cache = parsers.parse_index2(data, inline)
306 313 return index, cache
307 314
308 315 def packentry(self, entry, node, version, rev):
309 316 p = indexformatng_pack(*entry)
310 317 if rev == 0:
311 318 p = versionformat_pack(version) + p[4:]
312 319 return p
313 320
314 321
315 322 class revlog(object):
316 323 """
317 324 the underlying revision storage object
318 325
319 326 A revlog consists of two parts, an index and the revision data.
320 327
321 328 The index is a file with a fixed record size containing
322 329 information on each revision, including its nodeid (hash), the
323 330 nodeids of its parents, the position and offset of its data within
324 331 the data file, and the revision it's based on. Finally, each entry
325 332 contains a linkrev entry that can serve as a pointer to external
326 333 data.
327 334
328 335 The revision data itself is a linear collection of data chunks.
329 336 Each chunk represents a revision and is usually represented as a
330 337 delta against the previous chunk. To bound lookup time, runs of
331 338 deltas are limited to about 2 times the length of the original
332 339 version data. This makes retrieval of a version proportional to
333 340 its size, or O(1) relative to the number of revisions.
334 341
335 342 Both pieces of the revlog are written to in an append-only
336 343 fashion, which means we never need to rewrite a file to insert or
337 344 remove data, and can use some simple techniques to avoid the need
338 345 for locking while reading.
339 346
340 347 If checkambig, indexfile is opened with checkambig=True at
341 348 writing, to avoid file stat ambiguity.
342 349
343 350 If mmaplargeindex is True, and an mmapindexthreshold is set, the
344 351 index will be mmapped rather than read if it is larger than the
345 352 configured threshold.
346 353
347 354 If censorable is True, the revlog can have censored revisions.
348 355
349 356 If `upperboundcomp` is not None, this is the expected maximal gain from
350 357 compression for the data content.
351 358 """
352 359
353 360 _flagserrorclass = error.RevlogError
354 361
355 362 def __init__(
356 363 self,
357 364 opener,
358 365 indexfile,
359 366 datafile=None,
360 367 checkambig=False,
361 368 mmaplargeindex=False,
362 369 censorable=False,
363 370 upperboundcomp=None,
364 371 ):
365 372 """
366 373 create a revlog object
367 374
368 375 opener is a function that abstracts the file opening operation
369 376 and can be used to implement COW semantics or the like.
370 377
371 378 """
372 379 self.upperboundcomp = upperboundcomp
373 380 self.indexfile = indexfile
374 381 self.datafile = datafile or (indexfile[:-2] + b".d")
375 382 self.opener = opener
376 383 # When True, indexfile is opened with checkambig=True at writing, to
377 384 # avoid file stat ambiguity.
378 385 self._checkambig = checkambig
379 386 self._mmaplargeindex = mmaplargeindex
380 387 self._censorable = censorable
381 388 # 3-tuple of (node, rev, text) for a raw revision.
382 389 self._revisioncache = None
383 390 # Maps rev to chain base rev.
384 391 self._chainbasecache = util.lrucachedict(100)
385 392 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
386 393 self._chunkcache = (0, b'')
387 394 # How much data to read and cache into the raw revlog data cache.
388 395 self._chunkcachesize = 65536
389 396 self._maxchainlen = None
390 397 self._deltabothparents = True
391 398 self.index = None
392 399 # Mapping of partial identifiers to full nodes.
393 400 self._pcache = {}
394 401 # Mapping of revision integer to full node.
395 402 self._nodepos = None
396 403 self._compengine = b'zlib'
397 404 self._compengineopts = {}
398 405 self._maxdeltachainspan = -1
399 406 self._withsparseread = False
400 407 self._sparserevlog = False
401 408 self._srdensitythreshold = 0.50
402 409 self._srmingapsize = 262144
403 410
404 411 # Make copy of flag processors so each revlog instance can support
405 412 # custom flags.
406 413 self._flagprocessors = dict(flagutil.flagprocessors)
407 414
408 415 # 2-tuple of file handles being used for active writing.
409 416 self._writinghandles = None
410 417
411 418 self._loadindex()
412 419
413 420 def _loadindex(self):
414 421 mmapindexthreshold = None
415 422 opts = self.opener.options
416 423
417 424 if b'revlogv2' in opts:
418 425 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
419 426 elif b'revlogv1' in opts:
420 427 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
421 428 if b'generaldelta' in opts:
422 429 newversionflags |= FLAG_GENERALDELTA
423 430 elif b'revlogv0' in self.opener.options:
424 431 newversionflags = REVLOGV0
425 432 else:
426 433 newversionflags = REVLOG_DEFAULT_VERSION
427 434
428 435 if b'chunkcachesize' in opts:
429 436 self._chunkcachesize = opts[b'chunkcachesize']
430 437 if b'maxchainlen' in opts:
431 438 self._maxchainlen = opts[b'maxchainlen']
432 439 if b'deltabothparents' in opts:
433 440 self._deltabothparents = opts[b'deltabothparents']
434 441 self._lazydelta = bool(opts.get(b'lazydelta', True))
435 442 self._lazydeltabase = False
436 443 if self._lazydelta:
437 444 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
438 445 if b'compengine' in opts:
439 446 self._compengine = opts[b'compengine']
440 447 if b'zlib.level' in opts:
441 448 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
442 449 if b'zstd.level' in opts:
443 450 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
444 451 if b'maxdeltachainspan' in opts:
445 452 self._maxdeltachainspan = opts[b'maxdeltachainspan']
446 453 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
447 454 mmapindexthreshold = opts[b'mmapindexthreshold']
448 455 self.hassidedata = bool(opts.get(b'side-data', False))
449 456 if self.hassidedata:
450 457 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
451 458 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
452 459 withsparseread = bool(opts.get(b'with-sparse-read', False))
453 460 # sparse-revlog forces sparse-read
454 461 self._withsparseread = self._sparserevlog or withsparseread
455 462 if b'sparse-read-density-threshold' in opts:
456 463 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
457 464 if b'sparse-read-min-gap-size' in opts:
458 465 self._srmingapsize = opts[b'sparse-read-min-gap-size']
459 466 if opts.get(b'enableellipsis'):
460 467 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
461 468
462 469 # revlog v0 doesn't have flag processors
463 470 for flag, processor in pycompat.iteritems(
464 471 opts.get(b'flagprocessors', {})
465 472 ):
466 473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
467 474
468 475 if self._chunkcachesize <= 0:
469 476 raise error.RevlogError(
470 477 _(b'revlog chunk cache size %r is not greater than 0')
471 478 % self._chunkcachesize
472 479 )
473 480 elif self._chunkcachesize & (self._chunkcachesize - 1):
474 481 raise error.RevlogError(
475 482 _(b'revlog chunk cache size %r is not a power of 2')
476 483 % self._chunkcachesize
477 484 )
478 485
479 486 indexdata = b''
480 487 self._initempty = True
481 488 try:
482 489 with self._indexfp() as f:
483 490 if (
484 491 mmapindexthreshold is not None
485 492 and self.opener.fstat(f).st_size >= mmapindexthreshold
486 493 ):
487 494 # TODO: should .close() to release resources without
488 495 # relying on Python GC
489 496 indexdata = util.buffer(util.mmapread(f))
490 497 else:
491 498 indexdata = f.read()
492 499 if len(indexdata) > 0:
493 500 versionflags = versionformat_unpack(indexdata[:4])[0]
494 501 self._initempty = False
495 502 else:
496 503 versionflags = newversionflags
497 504 except IOError as inst:
498 505 if inst.errno != errno.ENOENT:
499 506 raise
500 507
501 508 versionflags = newversionflags
502 509
503 510 self.version = versionflags
504 511
505 512 flags = versionflags & ~0xFFFF
506 513 fmt = versionflags & 0xFFFF
507 514
508 515 if fmt == REVLOGV0:
509 516 if flags:
510 517 raise error.RevlogError(
511 518 _(b'unknown flags (%#04x) in version %d revlog %s')
512 519 % (flags >> 16, fmt, self.indexfile)
513 520 )
514 521
515 522 self._inline = False
516 523 self._generaldelta = False
517 524
518 525 elif fmt == REVLOGV1:
519 526 if flags & ~REVLOGV1_FLAGS:
520 527 raise error.RevlogError(
521 528 _(b'unknown flags (%#04x) in version %d revlog %s')
522 529 % (flags >> 16, fmt, self.indexfile)
523 530 )
524 531
525 532 self._inline = versionflags & FLAG_INLINE_DATA
526 533 self._generaldelta = versionflags & FLAG_GENERALDELTA
527 534
528 535 elif fmt == REVLOGV2:
529 536 if flags & ~REVLOGV2_FLAGS:
530 537 raise error.RevlogError(
531 538 _(b'unknown flags (%#04x) in version %d revlog %s')
532 539 % (flags >> 16, fmt, self.indexfile)
533 540 )
534 541
535 542 self._inline = versionflags & FLAG_INLINE_DATA
536 543 # generaldelta implied by version 2 revlogs.
537 544 self._generaldelta = True
538 545
539 546 else:
540 547 raise error.RevlogError(
541 548 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
542 549 )
543 550 # sparse-revlog can't be on without general-delta (issue6056)
544 551 if not self._generaldelta:
545 552 self._sparserevlog = False
546 553
547 554 self._storedeltachains = True
548 555
549 556 self._io = revlogio()
550 557 if self.version == REVLOGV0:
551 558 self._io = revlogoldio()
552 559 try:
553 560 d = self._io.parseindex(indexdata, self._inline)
554 561 except (ValueError, IndexError):
555 562 raise error.RevlogError(
556 563 _(b"index %s is corrupted") % self.indexfile
557 564 )
558 565 self.index, self._chunkcache = d
559 566 self.nodemap = self.index.nodemap
560 567 if not self._chunkcache:
561 568 self._chunkclear()
562 569 # revnum -> (chain-length, sum-delta-length)
563 570 self._chaininfocache = {}
564 571 # revlog header -> revlog compressor
565 572 self._decompressors = {}
566 573
567 574 @util.propertycache
568 575 def _compressor(self):
569 576 engine = util.compengines[self._compengine]
570 577 return engine.revlogcompressor(self._compengineopts)
571 578
572 579 def _indexfp(self, mode=b'r'):
573 580 """file object for the revlog's index file"""
574 581 args = {'mode': mode}
575 582 if mode != b'r':
576 583 args['checkambig'] = self._checkambig
577 584 if mode == b'w':
578 585 args['atomictemp'] = True
579 586 return self.opener(self.indexfile, **args)
580 587
581 588 def _datafp(self, mode=b'r'):
582 589 """file object for the revlog's data file"""
583 590 return self.opener(self.datafile, mode=mode)
584 591
585 592 @contextlib.contextmanager
586 593 def _datareadfp(self, existingfp=None):
587 594 """file object suitable to read data"""
588 595 # Use explicit file handle, if given.
589 596 if existingfp is not None:
590 597 yield existingfp
591 598
592 599 # Use a file handle being actively used for writes, if available.
593 600 # There is some danger to doing this because reads will seek the
594 601 # file. However, _writeentry() performs a SEEK_END before all writes,
595 602 # so we should be safe.
596 603 elif self._writinghandles:
597 604 if self._inline:
598 605 yield self._writinghandles[0]
599 606 else:
600 607 yield self._writinghandles[1]
601 608
602 609 # Otherwise open a new file handle.
603 610 else:
604 611 if self._inline:
605 612 func = self._indexfp
606 613 else:
607 614 func = self._datafp
608 615 with func() as fp:
609 616 yield fp
610 617
611 618 def tiprev(self):
612 619 return len(self.index) - 1
613 620
614 621 def tip(self):
615 622 return self.node(self.tiprev())
616 623
617 624 def __contains__(self, rev):
618 625 return 0 <= rev < len(self)
619 626
620 627 def __len__(self):
621 628 return len(self.index)
622 629
623 630 def __iter__(self):
624 631 return iter(pycompat.xrange(len(self)))
625 632
626 633 def revs(self, start=0, stop=None):
627 634 """iterate over all rev in this revlog (from start to stop)"""
628 635 return storageutil.iterrevs(len(self), start=start, stop=stop)
629 636
630 637 @util.propertycache
631 638 def nodemap(self):
632 639 if self.index:
633 640 # populate mapping down to the initial node
634 641 node0 = self.index[0][7] # get around changelog filtering
635 642 self.rev(node0)
636 643 return self.index.nodemap
637 644
638 645 @property
639 646 def _nodecache(self):
640 647 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
641 648 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
642 649 return self.index.nodemap
643 650
644 651 def hasnode(self, node):
645 652 try:
646 653 self.rev(node)
647 654 return True
648 655 except KeyError:
649 656 return False
650 657
651 658 def candelta(self, baserev, rev):
652 659 """whether two revisions (baserev, rev) can be delta-ed or not"""
653 660 # Disable delta if either rev requires a content-changing flag
654 661 # processor (ex. LFS). This is because such flag processor can alter
655 662 # the rawtext content that the delta will be based on, and two clients
656 663 # could have a same revlog node with different flags (i.e. different
657 664 # rawtext contents) and the delta could be incompatible.
658 665 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
659 666 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
660 667 ):
661 668 return False
662 669 return True
663 670
664 671 def clearcaches(self):
665 672 self._revisioncache = None
666 673 self._chainbasecache.clear()
667 674 self._chunkcache = (0, b'')
668 675 self._pcache = {}
669 676 self.index.clearcaches()
670 677
671 678 def rev(self, node):
672 679 try:
673 680 return self.index.nodemap[node]
674 681 except TypeError:
675 682 raise
676 683 except error.RevlogError:
677 684 # parsers.c radix tree lookup failed
678 685 if node == wdirid or node in wdirfilenodeids:
679 686 raise error.WdirUnsupported
680 687 raise error.LookupError(node, self.indexfile, _(b'no node'))
681 688
682 689 # Accessors for index entries.
683 690
684 691 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
685 692 # are flags.
686 693 def start(self, rev):
687 694 return int(self.index[rev][0] >> 16)
688 695
689 696 def flags(self, rev):
690 697 return self.index[rev][0] & 0xFFFF
691 698
692 699 def length(self, rev):
693 700 return self.index[rev][1]
694 701
695 702 def rawsize(self, rev):
696 703 """return the length of the uncompressed text for a given revision"""
697 704 l = self.index[rev][2]
698 705 if l >= 0:
699 706 return l
700 707
701 708 t = self.rawdata(rev)
702 709 return len(t)
703 710
704 711 def size(self, rev):
705 712 """length of non-raw text (processed by a "read" flag processor)"""
706 713 # fast path: if no "read" flag processor could change the content,
707 714 # size is rawsize. note: ELLIPSIS is known to not change the content.
708 715 flags = self.flags(rev)
709 716 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
710 717 return self.rawsize(rev)
711 718
712 719 return len(self.revision(rev, raw=False))
713 720
714 721 def chainbase(self, rev):
715 722 base = self._chainbasecache.get(rev)
716 723 if base is not None:
717 724 return base
718 725
719 726 index = self.index
720 727 iterrev = rev
721 728 base = index[iterrev][3]
722 729 while base != iterrev:
723 730 iterrev = base
724 731 base = index[iterrev][3]
725 732
726 733 self._chainbasecache[rev] = base
727 734 return base
728 735
729 736 def linkrev(self, rev):
730 737 return self.index[rev][4]
731 738
732 739 def parentrevs(self, rev):
733 740 try:
734 741 entry = self.index[rev]
735 742 except IndexError:
736 743 if rev == wdirrev:
737 744 raise error.WdirUnsupported
738 745 raise
739 746
740 747 return entry[5], entry[6]
741 748
742 749 # fast parentrevs(rev) where rev isn't filtered
743 750 _uncheckedparentrevs = parentrevs
744 751
745 752 def node(self, rev):
746 753 try:
747 754 return self.index[rev][7]
748 755 except IndexError:
749 756 if rev == wdirrev:
750 757 raise error.WdirUnsupported
751 758 raise
752 759
753 760 # Derived from index values.
754 761
755 762 def end(self, rev):
756 763 return self.start(rev) + self.length(rev)
757 764
758 765 def parents(self, node):
759 766 i = self.index
760 767 d = i[self.rev(node)]
761 768 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
762 769
763 770 def chainlen(self, rev):
764 771 return self._chaininfo(rev)[0]
765 772
766 773 def _chaininfo(self, rev):
767 774 chaininfocache = self._chaininfocache
768 775 if rev in chaininfocache:
769 776 return chaininfocache[rev]
770 777 index = self.index
771 778 generaldelta = self._generaldelta
772 779 iterrev = rev
773 780 e = index[iterrev]
774 781 clen = 0
775 782 compresseddeltalen = 0
776 783 while iterrev != e[3]:
777 784 clen += 1
778 785 compresseddeltalen += e[1]
779 786 if generaldelta:
780 787 iterrev = e[3]
781 788 else:
782 789 iterrev -= 1
783 790 if iterrev in chaininfocache:
784 791 t = chaininfocache[iterrev]
785 792 clen += t[0]
786 793 compresseddeltalen += t[1]
787 794 break
788 795 e = index[iterrev]
789 796 else:
790 797 # Add text length of base since decompressing that also takes
791 798 # work. For cache hits the length is already included.
792 799 compresseddeltalen += e[1]
793 800 r = (clen, compresseddeltalen)
794 801 chaininfocache[rev] = r
795 802 return r
796 803
797 804 def _deltachain(self, rev, stoprev=None):
798 805 """Obtain the delta chain for a revision.
799 806
800 807 ``stoprev`` specifies a revision to stop at. If not specified, we
801 808 stop at the base of the chain.
802 809
803 810 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
804 811 revs in ascending order and ``stopped`` is a bool indicating whether
805 812 ``stoprev`` was hit.
806 813 """
807 814 # Try C implementation.
808 815 try:
809 816 return self.index.deltachain(rev, stoprev, self._generaldelta)
810 817 except AttributeError:
811 818 pass
812 819
813 820 chain = []
814 821
815 822 # Alias to prevent attribute lookup in tight loop.
816 823 index = self.index
817 824 generaldelta = self._generaldelta
818 825
819 826 iterrev = rev
820 827 e = index[iterrev]
821 828 while iterrev != e[3] and iterrev != stoprev:
822 829 chain.append(iterrev)
823 830 if generaldelta:
824 831 iterrev = e[3]
825 832 else:
826 833 iterrev -= 1
827 834 e = index[iterrev]
828 835
829 836 if iterrev == stoprev:
830 837 stopped = True
831 838 else:
832 839 chain.append(iterrev)
833 840 stopped = False
834 841
835 842 chain.reverse()
836 843 return chain, stopped
837 844
838 845 def ancestors(self, revs, stoprev=0, inclusive=False):
839 846 """Generate the ancestors of 'revs' in reverse revision order.
840 847 Does not generate revs lower than stoprev.
841 848
842 849 See the documentation for ancestor.lazyancestors for more details."""
843 850
844 851 # first, make sure start revisions aren't filtered
845 852 revs = list(revs)
846 853 checkrev = self.node
847 854 for r in revs:
848 855 checkrev(r)
849 856 # and we're sure ancestors aren't filtered as well
850 857
851 858 if rustancestor is not None:
852 859 lazyancestors = rustancestor.LazyAncestors
853 860 arg = self.index
854 861 elif util.safehasattr(parsers, b'rustlazyancestors'):
855 862 lazyancestors = ancestor.rustlazyancestors
856 863 arg = self.index
857 864 else:
858 865 lazyancestors = ancestor.lazyancestors
859 866 arg = self._uncheckedparentrevs
860 867 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
861 868
862 869 def descendants(self, revs):
863 870 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
864 871
865 872 def findcommonmissing(self, common=None, heads=None):
866 873 """Return a tuple of the ancestors of common and the ancestors of heads
867 874 that are not ancestors of common. In revset terminology, we return the
868 875 tuple:
869 876
870 877 ::common, (::heads) - (::common)
871 878
872 879 The list is sorted by revision number, meaning it is
873 880 topologically sorted.
874 881
875 882 'heads' and 'common' are both lists of node IDs. If heads is
876 883 not supplied, uses all of the revlog's heads. If common is not
877 884 supplied, uses nullid."""
878 885 if common is None:
879 886 common = [nullid]
880 887 if heads is None:
881 888 heads = self.heads()
882 889
883 890 common = [self.rev(n) for n in common]
884 891 heads = [self.rev(n) for n in heads]
885 892
886 893 # we want the ancestors, but inclusive
887 894 class lazyset(object):
888 895 def __init__(self, lazyvalues):
889 896 self.addedvalues = set()
890 897 self.lazyvalues = lazyvalues
891 898
892 899 def __contains__(self, value):
893 900 return value in self.addedvalues or value in self.lazyvalues
894 901
895 902 def __iter__(self):
896 903 added = self.addedvalues
897 904 for r in added:
898 905 yield r
899 906 for r in self.lazyvalues:
900 907 if not r in added:
901 908 yield r
902 909
903 910 def add(self, value):
904 911 self.addedvalues.add(value)
905 912
906 913 def update(self, values):
907 914 self.addedvalues.update(values)
908 915
909 916 has = lazyset(self.ancestors(common))
910 917 has.add(nullrev)
911 918 has.update(common)
912 919
913 920 # take all ancestors from heads that aren't in has
914 921 missing = set()
915 922 visit = collections.deque(r for r in heads if r not in has)
916 923 while visit:
917 924 r = visit.popleft()
918 925 if r in missing:
919 926 continue
920 927 else:
921 928 missing.add(r)
922 929 for p in self.parentrevs(r):
923 930 if p not in has:
924 931 visit.append(p)
925 932 missing = list(missing)
926 933 missing.sort()
927 934 return has, [self.node(miss) for miss in missing]
928 935
929 936 def incrementalmissingrevs(self, common=None):
930 937 """Return an object that can be used to incrementally compute the
931 938 revision numbers of the ancestors of arbitrary sets that are not
932 939 ancestors of common. This is an ancestor.incrementalmissingancestors
933 940 object.
934 941
935 942 'common' is a list of revision numbers. If common is not supplied, uses
936 943 nullrev.
937 944 """
938 945 if common is None:
939 946 common = [nullrev]
940 947
941 948 if rustancestor is not None:
942 949 return rustancestor.MissingAncestors(self.index, common)
943 950 return ancestor.incrementalmissingancestors(self.parentrevs, common)
944 951
945 952 def findmissingrevs(self, common=None, heads=None):
946 953 """Return the revision numbers of the ancestors of heads that
947 954 are not ancestors of common.
948 955
949 956 More specifically, return a list of revision numbers corresponding to
950 957 nodes N such that every N satisfies the following constraints:
951 958
952 959 1. N is an ancestor of some node in 'heads'
953 960 2. N is not an ancestor of any node in 'common'
954 961
955 962 The list is sorted by revision number, meaning it is
956 963 topologically sorted.
957 964
958 965 'heads' and 'common' are both lists of revision numbers. If heads is
959 966 not supplied, uses all of the revlog's heads. If common is not
960 967 supplied, uses nullid."""
961 968 if common is None:
962 969 common = [nullrev]
963 970 if heads is None:
964 971 heads = self.headrevs()
965 972
966 973 inc = self.incrementalmissingrevs(common=common)
967 974 return inc.missingancestors(heads)
968 975
969 976 def findmissing(self, common=None, heads=None):
970 977 """Return the ancestors of heads that are not ancestors of common.
971 978
972 979 More specifically, return a list of nodes N such that every N
973 980 satisfies the following constraints:
974 981
975 982 1. N is an ancestor of some node in 'heads'
976 983 2. N is not an ancestor of any node in 'common'
977 984
978 985 The list is sorted by revision number, meaning it is
979 986 topologically sorted.
980 987
981 988 'heads' and 'common' are both lists of node IDs. If heads is
982 989 not supplied, uses all of the revlog's heads. If common is not
983 990 supplied, uses nullid."""
984 991 if common is None:
985 992 common = [nullid]
986 993 if heads is None:
987 994 heads = self.heads()
988 995
989 996 common = [self.rev(n) for n in common]
990 997 heads = [self.rev(n) for n in heads]
991 998
992 999 inc = self.incrementalmissingrevs(common=common)
993 1000 return [self.node(r) for r in inc.missingancestors(heads)]
994 1001
995 1002 def nodesbetween(self, roots=None, heads=None):
996 1003 """Return a topological path from 'roots' to 'heads'.
997 1004
998 1005 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
999 1006 topologically sorted list of all nodes N that satisfy both of
1000 1007 these constraints:
1001 1008
1002 1009 1. N is a descendant of some node in 'roots'
1003 1010 2. N is an ancestor of some node in 'heads'
1004 1011
1005 1012 Every node is considered to be both a descendant and an ancestor
1006 1013 of itself, so every reachable node in 'roots' and 'heads' will be
1007 1014 included in 'nodes'.
1008 1015
1009 1016 'outroots' is the list of reachable nodes in 'roots', i.e., the
1010 1017 subset of 'roots' that is returned in 'nodes'. Likewise,
1011 1018 'outheads' is the subset of 'heads' that is also in 'nodes'.
1012 1019
1013 1020 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1014 1021 unspecified, uses nullid as the only root. If 'heads' is
1015 1022 unspecified, uses list of all of the revlog's heads."""
1016 1023 nonodes = ([], [], [])
1017 1024 if roots is not None:
1018 1025 roots = list(roots)
1019 1026 if not roots:
1020 1027 return nonodes
1021 1028 lowestrev = min([self.rev(n) for n in roots])
1022 1029 else:
1023 1030 roots = [nullid] # Everybody's a descendant of nullid
1024 1031 lowestrev = nullrev
1025 1032 if (lowestrev == nullrev) and (heads is None):
1026 1033 # We want _all_ the nodes!
1027 1034 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1028 1035 if heads is None:
1029 1036 # All nodes are ancestors, so the latest ancestor is the last
1030 1037 # node.
1031 1038 highestrev = len(self) - 1
1032 1039 # Set ancestors to None to signal that every node is an ancestor.
1033 1040 ancestors = None
1034 1041 # Set heads to an empty dictionary for later discovery of heads
1035 1042 heads = {}
1036 1043 else:
1037 1044 heads = list(heads)
1038 1045 if not heads:
1039 1046 return nonodes
1040 1047 ancestors = set()
1041 1048 # Turn heads into a dictionary so we can remove 'fake' heads.
1042 1049 # Also, later we will be using it to filter out the heads we can't
1043 1050 # find from roots.
1044 1051 heads = dict.fromkeys(heads, False)
1045 1052 # Start at the top and keep marking parents until we're done.
1046 1053 nodestotag = set(heads)
1047 1054 # Remember where the top was so we can use it as a limit later.
1048 1055 highestrev = max([self.rev(n) for n in nodestotag])
1049 1056 while nodestotag:
1050 1057 # grab a node to tag
1051 1058 n = nodestotag.pop()
1052 1059 # Never tag nullid
1053 1060 if n == nullid:
1054 1061 continue
1055 1062 # A node's revision number represents its place in a
1056 1063 # topologically sorted list of nodes.
1057 1064 r = self.rev(n)
1058 1065 if r >= lowestrev:
1059 1066 if n not in ancestors:
1060 1067 # If we are possibly a descendant of one of the roots
1061 1068 # and we haven't already been marked as an ancestor
1062 1069 ancestors.add(n) # Mark as ancestor
1063 1070 # Add non-nullid parents to list of nodes to tag.
1064 1071 nodestotag.update(
1065 1072 [p for p in self.parents(n) if p != nullid]
1066 1073 )
1067 1074 elif n in heads: # We've seen it before, is it a fake head?
1068 1075 # So it is, real heads should not be the ancestors of
1069 1076 # any other heads.
1070 1077 heads.pop(n)
1071 1078 if not ancestors:
1072 1079 return nonodes
1073 1080 # Now that we have our set of ancestors, we want to remove any
1074 1081 # roots that are not ancestors.
1075 1082
1076 1083 # If one of the roots was nullid, everything is included anyway.
1077 1084 if lowestrev > nullrev:
1078 1085 # But, since we weren't, let's recompute the lowest rev to not
1079 1086 # include roots that aren't ancestors.
1080 1087
1081 1088 # Filter out roots that aren't ancestors of heads
1082 1089 roots = [root for root in roots if root in ancestors]
1083 1090 # Recompute the lowest revision
1084 1091 if roots:
1085 1092 lowestrev = min([self.rev(root) for root in roots])
1086 1093 else:
1087 1094 # No more roots? Return empty list
1088 1095 return nonodes
1089 1096 else:
1090 1097 # We are descending from nullid, and don't need to care about
1091 1098 # any other roots.
1092 1099 lowestrev = nullrev
1093 1100 roots = [nullid]
1094 1101 # Transform our roots list into a set.
1095 1102 descendants = set(roots)
1096 1103 # Also, keep the original roots so we can filter out roots that aren't
1097 1104 # 'real' roots (i.e. are descended from other roots).
1098 1105 roots = descendants.copy()
1099 1106 # Our topologically sorted list of output nodes.
1100 1107 orderedout = []
1101 1108 # Don't start at nullid since we don't want nullid in our output list,
1102 1109 # and if nullid shows up in descendants, empty parents will look like
1103 1110 # they're descendants.
1104 1111 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1105 1112 n = self.node(r)
1106 1113 isdescendant = False
1107 1114 if lowestrev == nullrev: # Everybody is a descendant of nullid
1108 1115 isdescendant = True
1109 1116 elif n in descendants:
1110 1117 # n is already a descendant
1111 1118 isdescendant = True
1112 1119 # This check only needs to be done here because all the roots
1113 1120 # will start being marked is descendants before the loop.
1114 1121 if n in roots:
1115 1122 # If n was a root, check if it's a 'real' root.
1116 1123 p = tuple(self.parents(n))
1117 1124 # If any of its parents are descendants, it's not a root.
1118 1125 if (p[0] in descendants) or (p[1] in descendants):
1119 1126 roots.remove(n)
1120 1127 else:
1121 1128 p = tuple(self.parents(n))
1122 1129 # A node is a descendant if either of its parents are
1123 1130 # descendants. (We seeded the dependents list with the roots
1124 1131 # up there, remember?)
1125 1132 if (p[0] in descendants) or (p[1] in descendants):
1126 1133 descendants.add(n)
1127 1134 isdescendant = True
1128 1135 if isdescendant and ((ancestors is None) or (n in ancestors)):
1129 1136 # Only include nodes that are both descendants and ancestors.
1130 1137 orderedout.append(n)
1131 1138 if (ancestors is not None) and (n in heads):
1132 1139 # We're trying to figure out which heads are reachable
1133 1140 # from roots.
1134 1141 # Mark this head as having been reached
1135 1142 heads[n] = True
1136 1143 elif ancestors is None:
1137 1144 # Otherwise, we're trying to discover the heads.
1138 1145 # Assume this is a head because if it isn't, the next step
1139 1146 # will eventually remove it.
1140 1147 heads[n] = True
1141 1148 # But, obviously its parents aren't.
1142 1149 for p in self.parents(n):
1143 1150 heads.pop(p, None)
1144 1151 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1145 1152 roots = list(roots)
1146 1153 assert orderedout
1147 1154 assert roots
1148 1155 assert heads
1149 1156 return (orderedout, roots, heads)
1150 1157
1151 1158 def headrevs(self, revs=None):
1152 1159 if revs is None:
1153 1160 try:
1154 1161 return self.index.headrevs()
1155 1162 except AttributeError:
1156 1163 return self._headrevs()
1157 1164 if rustdagop is not None:
1158 1165 return rustdagop.headrevs(self.index, revs)
1159 1166 return dagop.headrevs(revs, self._uncheckedparentrevs)
1160 1167
1161 1168 def computephases(self, roots):
1162 1169 return self.index.computephasesmapsets(roots)
1163 1170
1164 1171 def _headrevs(self):
1165 1172 count = len(self)
1166 1173 if not count:
1167 1174 return [nullrev]
1168 1175 # we won't iter over filtered rev so nobody is a head at start
1169 1176 ishead = [0] * (count + 1)
1170 1177 index = self.index
1171 1178 for r in self:
1172 1179 ishead[r] = 1 # I may be an head
1173 1180 e = index[r]
1174 1181 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1175 1182 return [r for r, val in enumerate(ishead) if val]
1176 1183
1177 1184 def heads(self, start=None, stop=None):
1178 1185 """return the list of all nodes that have no children
1179 1186
1180 1187 if start is specified, only heads that are descendants of
1181 1188 start will be returned
1182 1189 if stop is specified, it will consider all the revs from stop
1183 1190 as if they had no children
1184 1191 """
1185 1192 if start is None and stop is None:
1186 1193 if not len(self):
1187 1194 return [nullid]
1188 1195 return [self.node(r) for r in self.headrevs()]
1189 1196
1190 1197 if start is None:
1191 1198 start = nullrev
1192 1199 else:
1193 1200 start = self.rev(start)
1194 1201
1195 1202 stoprevs = set(self.rev(n) for n in stop or [])
1196 1203
1197 1204 revs = dagop.headrevssubset(
1198 1205 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1199 1206 )
1200 1207
1201 1208 return [self.node(rev) for rev in revs]
1202 1209
1203 1210 def children(self, node):
1204 1211 """find the children of a given node"""
1205 1212 c = []
1206 1213 p = self.rev(node)
1207 1214 for r in self.revs(start=p + 1):
1208 1215 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1209 1216 if prevs:
1210 1217 for pr in prevs:
1211 1218 if pr == p:
1212 1219 c.append(self.node(r))
1213 1220 elif p == nullrev:
1214 1221 c.append(self.node(r))
1215 1222 return c
1216 1223
1217 1224 def commonancestorsheads(self, a, b):
1218 1225 """calculate all the heads of the common ancestors of nodes a and b"""
1219 1226 a, b = self.rev(a), self.rev(b)
1220 1227 ancs = self._commonancestorsheads(a, b)
1221 1228 return pycompat.maplist(self.node, ancs)
1222 1229
1223 1230 def _commonancestorsheads(self, *revs):
1224 1231 """calculate all the heads of the common ancestors of revs"""
1225 1232 try:
1226 1233 ancs = self.index.commonancestorsheads(*revs)
1227 1234 except (AttributeError, OverflowError): # C implementation failed
1228 1235 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1229 1236 return ancs
1230 1237
1231 1238 def isancestor(self, a, b):
1232 1239 """return True if node a is an ancestor of node b
1233 1240
1234 1241 A revision is considered an ancestor of itself."""
1235 1242 a, b = self.rev(a), self.rev(b)
1236 1243 return self.isancestorrev(a, b)
1237 1244
1238 1245 def isancestorrev(self, a, b):
1239 1246 """return True if revision a is an ancestor of revision b
1240 1247
1241 1248 A revision is considered an ancestor of itself.
1242 1249
1243 1250 The implementation of this is trivial but the use of
1244 1251 reachableroots is not."""
1245 1252 if a == nullrev:
1246 1253 return True
1247 1254 elif a == b:
1248 1255 return True
1249 1256 elif a > b:
1250 1257 return False
1251 1258 return bool(self.reachableroots(a, [b], [a], includepath=False))
1252 1259
1253 1260 def reachableroots(self, minroot, heads, roots, includepath=False):
1254 1261 """return (heads(::<roots> and <roots>::<heads>))
1255 1262
1256 1263 If includepath is True, return (<roots>::<heads>)."""
1257 1264 try:
1258 1265 return self.index.reachableroots2(
1259 1266 minroot, heads, roots, includepath
1260 1267 )
1261 1268 except AttributeError:
1262 1269 return dagop._reachablerootspure(
1263 1270 self.parentrevs, minroot, roots, heads, includepath
1264 1271 )
1265 1272
1266 1273 def ancestor(self, a, b):
1267 1274 """calculate the "best" common ancestor of nodes a and b"""
1268 1275
1269 1276 a, b = self.rev(a), self.rev(b)
1270 1277 try:
1271 1278 ancs = self.index.ancestors(a, b)
1272 1279 except (AttributeError, OverflowError):
1273 1280 ancs = ancestor.ancestors(self.parentrevs, a, b)
1274 1281 if ancs:
1275 1282 # choose a consistent winner when there's a tie
1276 1283 return min(map(self.node, ancs))
1277 1284 return nullid
1278 1285
1279 1286 def _match(self, id):
1280 1287 if isinstance(id, int):
1281 1288 # rev
1282 1289 return self.node(id)
1283 1290 if len(id) == 20:
1284 1291 # possibly a binary node
1285 1292 # odds of a binary node being all hex in ASCII are 1 in 10**25
1286 1293 try:
1287 1294 node = id
1288 1295 self.rev(node) # quick search the index
1289 1296 return node
1290 1297 except error.LookupError:
1291 1298 pass # may be partial hex id
1292 1299 try:
1293 1300 # str(rev)
1294 1301 rev = int(id)
1295 1302 if b"%d" % rev != id:
1296 1303 raise ValueError
1297 1304 if rev < 0:
1298 1305 rev = len(self) + rev
1299 1306 if rev < 0 or rev >= len(self):
1300 1307 raise ValueError
1301 1308 return self.node(rev)
1302 1309 except (ValueError, OverflowError):
1303 1310 pass
1304 1311 if len(id) == 40:
1305 1312 try:
1306 1313 # a full hex nodeid?
1307 1314 node = bin(id)
1308 1315 self.rev(node)
1309 1316 return node
1310 1317 except (TypeError, error.LookupError):
1311 1318 pass
1312 1319
1313 1320 def _partialmatch(self, id):
1314 1321 # we don't care wdirfilenodeids as they should be always full hash
1315 1322 maybewdir = wdirhex.startswith(id)
1316 1323 try:
1317 1324 partial = self.index.partialmatch(id)
1318 1325 if partial and self.hasnode(partial):
1319 1326 if maybewdir:
1320 1327 # single 'ff...' match in radix tree, ambiguous with wdir
1321 1328 raise error.RevlogError
1322 1329 return partial
1323 1330 if maybewdir:
1324 1331 # no 'ff...' match in radix tree, wdir identified
1325 1332 raise error.WdirUnsupported
1326 1333 return None
1327 1334 except error.RevlogError:
1328 1335 # parsers.c radix tree lookup gave multiple matches
1329 1336 # fast path: for unfiltered changelog, radix tree is accurate
1330 1337 if not getattr(self, 'filteredrevs', None):
1331 1338 raise error.AmbiguousPrefixLookupError(
1332 1339 id, self.indexfile, _(b'ambiguous identifier')
1333 1340 )
1334 1341 # fall through to slow path that filters hidden revisions
1335 1342 except (AttributeError, ValueError):
1336 1343 # we are pure python, or key was too short to search radix tree
1337 1344 pass
1338 1345
1339 1346 if id in self._pcache:
1340 1347 return self._pcache[id]
1341 1348
1342 1349 if len(id) <= 40:
1343 1350 try:
1344 1351 # hex(node)[:...]
1345 1352 l = len(id) // 2 # grab an even number of digits
1346 1353 prefix = bin(id[: l * 2])
1347 1354 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1348 1355 nl = [
1349 1356 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1350 1357 ]
1351 1358 if nullhex.startswith(id):
1352 1359 nl.append(nullid)
1353 1360 if len(nl) > 0:
1354 1361 if len(nl) == 1 and not maybewdir:
1355 1362 self._pcache[id] = nl[0]
1356 1363 return nl[0]
1357 1364 raise error.AmbiguousPrefixLookupError(
1358 1365 id, self.indexfile, _(b'ambiguous identifier')
1359 1366 )
1360 1367 if maybewdir:
1361 1368 raise error.WdirUnsupported
1362 1369 return None
1363 1370 except TypeError:
1364 1371 pass
1365 1372
1366 1373 def lookup(self, id):
1367 1374 """locate a node based on:
1368 1375 - revision number or str(revision number)
1369 1376 - nodeid or subset of hex nodeid
1370 1377 """
1371 1378 n = self._match(id)
1372 1379 if n is not None:
1373 1380 return n
1374 1381 n = self._partialmatch(id)
1375 1382 if n:
1376 1383 return n
1377 1384
1378 1385 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1379 1386
1380 1387 def shortest(self, node, minlength=1):
1381 1388 """Find the shortest unambiguous prefix that matches node."""
1382 1389
1383 1390 def isvalid(prefix):
1384 1391 try:
1385 1392 matchednode = self._partialmatch(prefix)
1386 1393 except error.AmbiguousPrefixLookupError:
1387 1394 return False
1388 1395 except error.WdirUnsupported:
1389 1396 # single 'ff...' match
1390 1397 return True
1391 1398 if matchednode is None:
1392 1399 raise error.LookupError(node, self.indexfile, _(b'no node'))
1393 1400 return True
1394 1401
1395 1402 def maybewdir(prefix):
1396 1403 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1397 1404
1398 1405 hexnode = hex(node)
1399 1406
1400 1407 def disambiguate(hexnode, minlength):
1401 1408 """Disambiguate against wdirid."""
1402 1409 for length in range(minlength, 41):
1403 1410 prefix = hexnode[:length]
1404 1411 if not maybewdir(prefix):
1405 1412 return prefix
1406 1413
1407 1414 if not getattr(self, 'filteredrevs', None):
1408 1415 try:
1409 1416 length = max(self.index.shortest(node), minlength)
1410 1417 return disambiguate(hexnode, length)
1411 1418 except error.RevlogError:
1412 1419 if node != wdirid:
1413 1420 raise error.LookupError(node, self.indexfile, _(b'no node'))
1414 1421 except AttributeError:
1415 1422 # Fall through to pure code
1416 1423 pass
1417 1424
1418 1425 if node == wdirid:
1419 1426 for length in range(minlength, 41):
1420 1427 prefix = hexnode[:length]
1421 1428 if isvalid(prefix):
1422 1429 return prefix
1423 1430
1424 1431 for length in range(minlength, 41):
1425 1432 prefix = hexnode[:length]
1426 1433 if isvalid(prefix):
1427 1434 return disambiguate(hexnode, length)
1428 1435
1429 1436 def cmp(self, node, text):
1430 1437 """compare text with a given file revision
1431 1438
1432 1439 returns True if text is different than what is stored.
1433 1440 """
1434 1441 p1, p2 = self.parents(node)
1435 1442 return storageutil.hashrevisionsha1(text, p1, p2) != node
1436 1443
1437 1444 def _cachesegment(self, offset, data):
1438 1445 """Add a segment to the revlog cache.
1439 1446
1440 1447 Accepts an absolute offset and the data that is at that location.
1441 1448 """
1442 1449 o, d = self._chunkcache
1443 1450 # try to add to existing cache
1444 1451 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1445 1452 self._chunkcache = o, d + data
1446 1453 else:
1447 1454 self._chunkcache = offset, data
1448 1455
1449 1456 def _readsegment(self, offset, length, df=None):
1450 1457 """Load a segment of raw data from the revlog.
1451 1458
1452 1459 Accepts an absolute offset, length to read, and an optional existing
1453 1460 file handle to read from.
1454 1461
1455 1462 If an existing file handle is passed, it will be seeked and the
1456 1463 original seek position will NOT be restored.
1457 1464
1458 1465 Returns a str or buffer of raw byte data.
1459 1466
1460 1467 Raises if the requested number of bytes could not be read.
1461 1468 """
1462 1469 # Cache data both forward and backward around the requested
1463 1470 # data, in a fixed size window. This helps speed up operations
1464 1471 # involving reading the revlog backwards.
1465 1472 cachesize = self._chunkcachesize
1466 1473 realoffset = offset & ~(cachesize - 1)
1467 1474 reallength = (
1468 1475 (offset + length + cachesize) & ~(cachesize - 1)
1469 1476 ) - realoffset
1470 1477 with self._datareadfp(df) as df:
1471 1478 df.seek(realoffset)
1472 1479 d = df.read(reallength)
1473 1480
1474 1481 self._cachesegment(realoffset, d)
1475 1482 if offset != realoffset or reallength != length:
1476 1483 startoffset = offset - realoffset
1477 1484 if len(d) - startoffset < length:
1478 1485 raise error.RevlogError(
1479 1486 _(
1480 1487 b'partial read of revlog %s; expected %d bytes from '
1481 1488 b'offset %d, got %d'
1482 1489 )
1483 1490 % (
1484 1491 self.indexfile if self._inline else self.datafile,
1485 1492 length,
1486 1493 realoffset,
1487 1494 len(d) - startoffset,
1488 1495 )
1489 1496 )
1490 1497
1491 1498 return util.buffer(d, startoffset, length)
1492 1499
1493 1500 if len(d) < length:
1494 1501 raise error.RevlogError(
1495 1502 _(
1496 1503 b'partial read of revlog %s; expected %d bytes from offset '
1497 1504 b'%d, got %d'
1498 1505 )
1499 1506 % (
1500 1507 self.indexfile if self._inline else self.datafile,
1501 1508 length,
1502 1509 offset,
1503 1510 len(d),
1504 1511 )
1505 1512 )
1506 1513
1507 1514 return d
1508 1515
1509 1516 def _getsegment(self, offset, length, df=None):
1510 1517 """Obtain a segment of raw data from the revlog.
1511 1518
1512 1519 Accepts an absolute offset, length of bytes to obtain, and an
1513 1520 optional file handle to the already-opened revlog. If the file
1514 1521 handle is used, it's original seek position will not be preserved.
1515 1522
1516 1523 Requests for data may be returned from a cache.
1517 1524
1518 1525 Returns a str or a buffer instance of raw byte data.
1519 1526 """
1520 1527 o, d = self._chunkcache
1521 1528 l = len(d)
1522 1529
1523 1530 # is it in the cache?
1524 1531 cachestart = offset - o
1525 1532 cacheend = cachestart + length
1526 1533 if cachestart >= 0 and cacheend <= l:
1527 1534 if cachestart == 0 and cacheend == l:
1528 1535 return d # avoid a copy
1529 1536 return util.buffer(d, cachestart, cacheend - cachestart)
1530 1537
1531 1538 return self._readsegment(offset, length, df=df)
1532 1539
1533 1540 def _getsegmentforrevs(self, startrev, endrev, df=None):
1534 1541 """Obtain a segment of raw data corresponding to a range of revisions.
1535 1542
1536 1543 Accepts the start and end revisions and an optional already-open
1537 1544 file handle to be used for reading. If the file handle is read, its
1538 1545 seek position will not be preserved.
1539 1546
1540 1547 Requests for data may be satisfied by a cache.
1541 1548
1542 1549 Returns a 2-tuple of (offset, data) for the requested range of
1543 1550 revisions. Offset is the integer offset from the beginning of the
1544 1551 revlog and data is a str or buffer of the raw byte data.
1545 1552
1546 1553 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1547 1554 to determine where each revision's data begins and ends.
1548 1555 """
1549 1556 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1550 1557 # (functions are expensive).
1551 1558 index = self.index
1552 1559 istart = index[startrev]
1553 1560 start = int(istart[0] >> 16)
1554 1561 if startrev == endrev:
1555 1562 end = start + istart[1]
1556 1563 else:
1557 1564 iend = index[endrev]
1558 1565 end = int(iend[0] >> 16) + iend[1]
1559 1566
1560 1567 if self._inline:
1561 1568 start += (startrev + 1) * self._io.size
1562 1569 end += (endrev + 1) * self._io.size
1563 1570 length = end - start
1564 1571
1565 1572 return start, self._getsegment(start, length, df=df)
1566 1573
1567 1574 def _chunk(self, rev, df=None):
1568 1575 """Obtain a single decompressed chunk for a revision.
1569 1576
1570 1577 Accepts an integer revision and an optional already-open file handle
1571 1578 to be used for reading. If used, the seek position of the file will not
1572 1579 be preserved.
1573 1580
1574 1581 Returns a str holding uncompressed data for the requested revision.
1575 1582 """
1576 1583 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1577 1584
1578 1585 def _chunks(self, revs, df=None, targetsize=None):
1579 1586 """Obtain decompressed chunks for the specified revisions.
1580 1587
1581 1588 Accepts an iterable of numeric revisions that are assumed to be in
1582 1589 ascending order. Also accepts an optional already-open file handle
1583 1590 to be used for reading. If used, the seek position of the file will
1584 1591 not be preserved.
1585 1592
1586 1593 This function is similar to calling ``self._chunk()`` multiple times,
1587 1594 but is faster.
1588 1595
1589 1596 Returns a list with decompressed data for each requested revision.
1590 1597 """
1591 1598 if not revs:
1592 1599 return []
1593 1600 start = self.start
1594 1601 length = self.length
1595 1602 inline = self._inline
1596 1603 iosize = self._io.size
1597 1604 buffer = util.buffer
1598 1605
1599 1606 l = []
1600 1607 ladd = l.append
1601 1608
1602 1609 if not self._withsparseread:
1603 1610 slicedchunks = (revs,)
1604 1611 else:
1605 1612 slicedchunks = deltautil.slicechunk(
1606 1613 self, revs, targetsize=targetsize
1607 1614 )
1608 1615
1609 1616 for revschunk in slicedchunks:
1610 1617 firstrev = revschunk[0]
1611 1618 # Skip trailing revisions with empty diff
1612 1619 for lastrev in revschunk[::-1]:
1613 1620 if length(lastrev) != 0:
1614 1621 break
1615 1622
1616 1623 try:
1617 1624 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1618 1625 except OverflowError:
1619 1626 # issue4215 - we can't cache a run of chunks greater than
1620 1627 # 2G on Windows
1621 1628 return [self._chunk(rev, df=df) for rev in revschunk]
1622 1629
1623 1630 decomp = self.decompress
1624 1631 for rev in revschunk:
1625 1632 chunkstart = start(rev)
1626 1633 if inline:
1627 1634 chunkstart += (rev + 1) * iosize
1628 1635 chunklength = length(rev)
1629 1636 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1630 1637
1631 1638 return l
1632 1639
1633 1640 def _chunkclear(self):
1634 1641 """Clear the raw chunk cache."""
1635 1642 self._chunkcache = (0, b'')
1636 1643
1637 1644 def deltaparent(self, rev):
1638 1645 """return deltaparent of the given revision"""
1639 1646 base = self.index[rev][3]
1640 1647 if base == rev:
1641 1648 return nullrev
1642 1649 elif self._generaldelta:
1643 1650 return base
1644 1651 else:
1645 1652 return rev - 1
1646 1653
1647 1654 def issnapshot(self, rev):
1648 1655 """tells whether rev is a snapshot
1649 1656 """
1650 1657 if not self._sparserevlog:
1651 1658 return self.deltaparent(rev) == nullrev
1652 1659 elif util.safehasattr(self.index, b'issnapshot'):
1653 1660 # directly assign the method to cache the testing and access
1654 1661 self.issnapshot = self.index.issnapshot
1655 1662 return self.issnapshot(rev)
1656 1663 if rev == nullrev:
1657 1664 return True
1658 1665 entry = self.index[rev]
1659 1666 base = entry[3]
1660 1667 if base == rev:
1661 1668 return True
1662 1669 if base == nullrev:
1663 1670 return True
1664 1671 p1 = entry[5]
1665 1672 p2 = entry[6]
1666 1673 if base == p1 or base == p2:
1667 1674 return False
1668 1675 return self.issnapshot(base)
1669 1676
1670 1677 def snapshotdepth(self, rev):
1671 1678 """number of snapshot in the chain before this one"""
1672 1679 if not self.issnapshot(rev):
1673 1680 raise error.ProgrammingError(b'revision %d not a snapshot')
1674 1681 return len(self._deltachain(rev)[0]) - 1
1675 1682
1676 1683 def revdiff(self, rev1, rev2):
1677 1684 """return or calculate a delta between two revisions
1678 1685
1679 1686 The delta calculated is in binary form and is intended to be written to
1680 1687 revlog data directly. So this function needs raw revision data.
1681 1688 """
1682 1689 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1683 1690 return bytes(self._chunk(rev2))
1684 1691
1685 1692 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1686 1693
1687 1694 def _processflags(self, text, flags, operation, raw=False):
1688 1695 """deprecated entry point to access flag processors"""
1689 1696 msg = b'_processflag(...) use the specialized variant'
1690 1697 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1691 1698 if raw:
1692 1699 return text, flagutil.processflagsraw(self, text, flags)
1693 1700 elif operation == b'read':
1694 1701 return flagutil.processflagsread(self, text, flags)
1695 1702 else: # write operation
1696 1703 return flagutil.processflagswrite(self, text, flags)
1697 1704
1698 1705 def revision(self, nodeorrev, _df=None, raw=False):
1699 1706 """return an uncompressed revision of a given node or revision
1700 1707 number.
1701 1708
1702 1709 _df - an existing file handle to read from. (internal-only)
1703 1710 raw - an optional argument specifying if the revision data is to be
1704 1711 treated as raw data when applying flag transforms. 'raw' should be set
1705 1712 to True when generating changegroups or in debug commands.
1706 1713 """
1707 1714 if raw:
1708 1715 msg = (
1709 1716 b'revlog.revision(..., raw=True) is deprecated, '
1710 1717 b'use revlog.rawdata(...)'
1711 1718 )
1712 1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1713 1720 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1714 1721
1715 1722 def sidedata(self, nodeorrev, _df=None):
1716 1723 """a map of extra data related to the changeset but not part of the hash
1717 1724
1718 1725 This function currently return a dictionary. However, more advanced
1719 1726 mapping object will likely be used in the future for a more
1720 1727 efficient/lazy code.
1721 1728 """
1722 1729 return self._revisiondata(nodeorrev, _df)[1]
1723 1730
1724 1731 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1725 1732 # deal with <nodeorrev> argument type
1726 1733 if isinstance(nodeorrev, int):
1727 1734 rev = nodeorrev
1728 1735 node = self.node(rev)
1729 1736 else:
1730 1737 node = nodeorrev
1731 1738 rev = None
1732 1739
1733 1740 # fast path the special `nullid` rev
1734 1741 if node == nullid:
1735 1742 return b"", {}
1736 1743
1737 1744 # The text as stored inside the revlog. Might be the revision or might
1738 1745 # need to be processed to retrieve the revision.
1739 1746 rawtext = None
1740 1747
1741 1748 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1742 1749
1743 1750 if raw and validated:
1744 1751 # if we don't want to process the raw text and that raw
1745 1752 # text is cached, we can exit early.
1746 1753 return rawtext, {}
1747 1754 if rev is None:
1748 1755 rev = self.rev(node)
1749 1756 # the revlog's flag for this revision
1750 1757 # (usually alter its state or content)
1751 1758 flags = self.flags(rev)
1752 1759
1753 1760 if validated and flags == REVIDX_DEFAULT_FLAGS:
1754 1761 # no extra flags set, no flag processor runs, text = rawtext
1755 1762 return rawtext, {}
1756 1763
1757 1764 sidedata = {}
1758 1765 if raw:
1759 1766 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1760 1767 text = rawtext
1761 1768 else:
1762 1769 try:
1763 1770 r = flagutil.processflagsread(self, rawtext, flags)
1764 1771 except error.SidedataHashError as exc:
1765 1772 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1766 1773 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1767 1774 raise error.RevlogError(msg)
1768 1775 text, validatehash, sidedata = r
1769 1776 if validatehash:
1770 1777 self.checkhash(text, node, rev=rev)
1771 1778 if not validated:
1772 1779 self._revisioncache = (node, rev, rawtext)
1773 1780
1774 1781 return text, sidedata
1775 1782
1776 1783 def _rawtext(self, node, rev, _df=None):
1777 1784 """return the possibly unvalidated rawtext for a revision
1778 1785
1779 1786 returns (rev, rawtext, validated)
1780 1787 """
1781 1788
1782 1789 # revision in the cache (could be useful to apply delta)
1783 1790 cachedrev = None
1784 1791 # An intermediate text to apply deltas to
1785 1792 basetext = None
1786 1793
1787 1794 # Check if we have the entry in cache
1788 1795 # The cache entry looks like (node, rev, rawtext)
1789 1796 if self._revisioncache:
1790 1797 if self._revisioncache[0] == node:
1791 1798 return (rev, self._revisioncache[2], True)
1792 1799 cachedrev = self._revisioncache[1]
1793 1800
1794 1801 if rev is None:
1795 1802 rev = self.rev(node)
1796 1803
1797 1804 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1798 1805 if stopped:
1799 1806 basetext = self._revisioncache[2]
1800 1807
1801 1808 # drop cache to save memory, the caller is expected to
1802 1809 # update self._revisioncache after validating the text
1803 1810 self._revisioncache = None
1804 1811
1805 1812 targetsize = None
1806 1813 rawsize = self.index[rev][2]
1807 1814 if 0 <= rawsize:
1808 1815 targetsize = 4 * rawsize
1809 1816
1810 1817 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1811 1818 if basetext is None:
1812 1819 basetext = bytes(bins[0])
1813 1820 bins = bins[1:]
1814 1821
1815 1822 rawtext = mdiff.patches(basetext, bins)
1816 1823 del basetext # let us have a chance to free memory early
1817 1824 return (rev, rawtext, False)
1818 1825
1819 1826 def rawdata(self, nodeorrev, _df=None):
1820 1827 """return an uncompressed raw data of a given node or revision number.
1821 1828
1822 1829 _df - an existing file handle to read from. (internal-only)
1823 1830 """
1824 1831 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1825 1832
1826 1833 def hash(self, text, p1, p2):
1827 1834 """Compute a node hash.
1828 1835
1829 1836 Available as a function so that subclasses can replace the hash
1830 1837 as needed.
1831 1838 """
1832 1839 return storageutil.hashrevisionsha1(text, p1, p2)
1833 1840
1834 1841 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1835 1842 """Check node hash integrity.
1836 1843
1837 1844 Available as a function so that subclasses can extend hash mismatch
1838 1845 behaviors as needed.
1839 1846 """
1840 1847 try:
1841 1848 if p1 is None and p2 is None:
1842 1849 p1, p2 = self.parents(node)
1843 1850 if node != self.hash(text, p1, p2):
1844 1851 # Clear the revision cache on hash failure. The revision cache
1845 1852 # only stores the raw revision and clearing the cache does have
1846 1853 # the side-effect that we won't have a cache hit when the raw
1847 1854 # revision data is accessed. But this case should be rare and
1848 1855 # it is extra work to teach the cache about the hash
1849 1856 # verification state.
1850 1857 if self._revisioncache and self._revisioncache[0] == node:
1851 1858 self._revisioncache = None
1852 1859
1853 1860 revornode = rev
1854 1861 if revornode is None:
1855 1862 revornode = templatefilters.short(hex(node))
1856 1863 raise error.RevlogError(
1857 1864 _(b"integrity check failed on %s:%s")
1858 1865 % (self.indexfile, pycompat.bytestr(revornode))
1859 1866 )
1860 1867 except error.RevlogError:
1861 1868 if self._censorable and storageutil.iscensoredtext(text):
1862 1869 raise error.CensoredNodeError(self.indexfile, node, text)
1863 1870 raise
1864 1871
1865 1872 def _enforceinlinesize(self, tr, fp=None):
1866 1873 """Check if the revlog is too big for inline and convert if so.
1867 1874
1868 1875 This should be called after revisions are added to the revlog. If the
1869 1876 revlog has grown too large to be an inline revlog, it will convert it
1870 1877 to use multiple index and data files.
1871 1878 """
1872 1879 tiprev = len(self) - 1
1873 1880 if (
1874 1881 not self._inline
1875 1882 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1876 1883 ):
1877 1884 return
1878 1885
1879 1886 trinfo = tr.find(self.indexfile)
1880 1887 if trinfo is None:
1881 1888 raise error.RevlogError(
1882 1889 _(b"%s not found in the transaction") % self.indexfile
1883 1890 )
1884 1891
1885 1892 trindex = trinfo[2]
1886 1893 if trindex is not None:
1887 1894 dataoff = self.start(trindex)
1888 1895 else:
1889 1896 # revlog was stripped at start of transaction, use all leftover data
1890 1897 trindex = len(self) - 1
1891 1898 dataoff = self.end(tiprev)
1892 1899
1893 1900 tr.add(self.datafile, dataoff)
1894 1901
1895 1902 if fp:
1896 1903 fp.flush()
1897 1904 fp.close()
1898 1905 # We can't use the cached file handle after close(). So prevent
1899 1906 # its usage.
1900 1907 self._writinghandles = None
1901 1908
1902 1909 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1903 1910 for r in self:
1904 1911 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1905 1912
1906 1913 with self._indexfp(b'w') as fp:
1907 1914 self.version &= ~FLAG_INLINE_DATA
1908 1915 self._inline = False
1909 1916 io = self._io
1910 1917 for i in self:
1911 1918 e = io.packentry(self.index[i], self.node, self.version, i)
1912 1919 fp.write(e)
1913 1920
1914 1921 # the temp file replace the real index when we exit the context
1915 1922 # manager
1916 1923
1917 1924 tr.replace(self.indexfile, trindex * self._io.size)
1918 1925 self._chunkclear()
1919 1926
1920 1927 def _nodeduplicatecallback(self, transaction, node):
1921 1928 """called when trying to add a node already stored.
1922 1929 """
1923 1930
1924 1931 def addrevision(
1925 1932 self,
1926 1933 text,
1927 1934 transaction,
1928 1935 link,
1929 1936 p1,
1930 1937 p2,
1931 1938 cachedelta=None,
1932 1939 node=None,
1933 1940 flags=REVIDX_DEFAULT_FLAGS,
1934 1941 deltacomputer=None,
1935 1942 sidedata=None,
1936 1943 ):
1937 1944 """add a revision to the log
1938 1945
1939 1946 text - the revision data to add
1940 1947 transaction - the transaction object used for rollback
1941 1948 link - the linkrev data to add
1942 1949 p1, p2 - the parent nodeids of the revision
1943 1950 cachedelta - an optional precomputed delta
1944 1951 node - nodeid of revision; typically node is not specified, and it is
1945 1952 computed by default as hash(text, p1, p2), however subclasses might
1946 1953 use different hashing method (and override checkhash() in such case)
1947 1954 flags - the known flags to set on the revision
1948 1955 deltacomputer - an optional deltacomputer instance shared between
1949 1956 multiple calls
1950 1957 """
1951 1958 if link == nullrev:
1952 1959 raise error.RevlogError(
1953 1960 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1954 1961 )
1955 1962
1956 1963 if sidedata is None:
1957 1964 sidedata = {}
1958 1965 flags = flags & ~REVIDX_SIDEDATA
1959 1966 elif not self.hassidedata:
1960 1967 raise error.ProgrammingError(
1961 1968 _(b"trying to add sidedata to a revlog who don't support them")
1962 1969 )
1963 1970 else:
1964 1971 flags |= REVIDX_SIDEDATA
1965 1972
1966 1973 if flags:
1967 1974 node = node or self.hash(text, p1, p2)
1968 1975
1969 1976 rawtext, validatehash = flagutil.processflagswrite(
1970 1977 self, text, flags, sidedata=sidedata
1971 1978 )
1972 1979
1973 1980 # If the flag processor modifies the revision data, ignore any provided
1974 1981 # cachedelta.
1975 1982 if rawtext != text:
1976 1983 cachedelta = None
1977 1984
1978 1985 if len(rawtext) > _maxentrysize:
1979 1986 raise error.RevlogError(
1980 1987 _(
1981 1988 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1982 1989 )
1983 1990 % (self.indexfile, len(rawtext))
1984 1991 )
1985 1992
1986 1993 node = node or self.hash(rawtext, p1, p2)
1987 1994 if node in self.nodemap:
1988 1995 return node
1989 1996
1990 1997 if validatehash:
1991 1998 self.checkhash(rawtext, node, p1=p1, p2=p2)
1992 1999
1993 2000 return self.addrawrevision(
1994 2001 rawtext,
1995 2002 transaction,
1996 2003 link,
1997 2004 p1,
1998 2005 p2,
1999 2006 node,
2000 2007 flags,
2001 2008 cachedelta=cachedelta,
2002 2009 deltacomputer=deltacomputer,
2003 2010 )
2004 2011
2005 2012 def addrawrevision(
2006 2013 self,
2007 2014 rawtext,
2008 2015 transaction,
2009 2016 link,
2010 2017 p1,
2011 2018 p2,
2012 2019 node,
2013 2020 flags,
2014 2021 cachedelta=None,
2015 2022 deltacomputer=None,
2016 2023 ):
2017 2024 """add a raw revision with known flags, node and parents
2018 2025 useful when reusing a revision not stored in this revlog (ex: received
2019 2026 over wire, or read from an external bundle).
2020 2027 """
2021 2028 dfh = None
2022 2029 if not self._inline:
2023 2030 dfh = self._datafp(b"a+")
2024 2031 ifh = self._indexfp(b"a+")
2025 2032 try:
2026 2033 return self._addrevision(
2027 2034 node,
2028 2035 rawtext,
2029 2036 transaction,
2030 2037 link,
2031 2038 p1,
2032 2039 p2,
2033 2040 flags,
2034 2041 cachedelta,
2035 2042 ifh,
2036 2043 dfh,
2037 2044 deltacomputer=deltacomputer,
2038 2045 )
2039 2046 finally:
2040 2047 if dfh:
2041 2048 dfh.close()
2042 2049 ifh.close()
2043 2050
2044 2051 def compress(self, data):
2045 2052 """Generate a possibly-compressed representation of data."""
2046 2053 if not data:
2047 2054 return b'', data
2048 2055
2049 2056 compressed = self._compressor.compress(data)
2050 2057
2051 2058 if compressed:
2052 2059 # The revlog compressor added the header in the returned data.
2053 2060 return b'', compressed
2054 2061
2055 2062 if data[0:1] == b'\0':
2056 2063 return b'', data
2057 2064 return b'u', data
2058 2065
2059 2066 def decompress(self, data):
2060 2067 """Decompress a revlog chunk.
2061 2068
2062 2069 The chunk is expected to begin with a header identifying the
2063 2070 format type so it can be routed to an appropriate decompressor.
2064 2071 """
2065 2072 if not data:
2066 2073 return data
2067 2074
2068 2075 # Revlogs are read much more frequently than they are written and many
2069 2076 # chunks only take microseconds to decompress, so performance is
2070 2077 # important here.
2071 2078 #
2072 2079 # We can make a few assumptions about revlogs:
2073 2080 #
2074 2081 # 1) the majority of chunks will be compressed (as opposed to inline
2075 2082 # raw data).
2076 2083 # 2) decompressing *any* data will likely by at least 10x slower than
2077 2084 # returning raw inline data.
2078 2085 # 3) we want to prioritize common and officially supported compression
2079 2086 # engines
2080 2087 #
2081 2088 # It follows that we want to optimize for "decompress compressed data
2082 2089 # when encoded with common and officially supported compression engines"
2083 2090 # case over "raw data" and "data encoded by less common or non-official
2084 2091 # compression engines." That is why we have the inline lookup first
2085 2092 # followed by the compengines lookup.
2086 2093 #
2087 2094 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2088 2095 # compressed chunks. And this matters for changelog and manifest reads.
2089 2096 t = data[0:1]
2090 2097
2091 2098 if t == b'x':
2092 2099 try:
2093 2100 return _zlibdecompress(data)
2094 2101 except zlib.error as e:
2095 2102 raise error.RevlogError(
2096 2103 _(b'revlog decompress error: %s')
2097 2104 % stringutil.forcebytestr(e)
2098 2105 )
2099 2106 # '\0' is more common than 'u' so it goes first.
2100 2107 elif t == b'\0':
2101 2108 return data
2102 2109 elif t == b'u':
2103 2110 return util.buffer(data, 1)
2104 2111
2105 2112 try:
2106 2113 compressor = self._decompressors[t]
2107 2114 except KeyError:
2108 2115 try:
2109 2116 engine = util.compengines.forrevlogheader(t)
2110 2117 compressor = engine.revlogcompressor(self._compengineopts)
2111 2118 self._decompressors[t] = compressor
2112 2119 except KeyError:
2113 2120 raise error.RevlogError(_(b'unknown compression type %r') % t)
2114 2121
2115 2122 return compressor.decompress(data)
2116 2123
2117 2124 def _addrevision(
2118 2125 self,
2119 2126 node,
2120 2127 rawtext,
2121 2128 transaction,
2122 2129 link,
2123 2130 p1,
2124 2131 p2,
2125 2132 flags,
2126 2133 cachedelta,
2127 2134 ifh,
2128 2135 dfh,
2129 2136 alwayscache=False,
2130 2137 deltacomputer=None,
2131 2138 ):
2132 2139 """internal function to add revisions to the log
2133 2140
2134 2141 see addrevision for argument descriptions.
2135 2142
2136 2143 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2137 2144
2138 2145 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2139 2146 be used.
2140 2147
2141 2148 invariants:
2142 2149 - rawtext is optional (can be None); if not set, cachedelta must be set.
2143 2150 if both are set, they must correspond to each other.
2144 2151 """
2145 2152 if node == nullid:
2146 2153 raise error.RevlogError(
2147 2154 _(b"%s: attempt to add null revision") % self.indexfile
2148 2155 )
2149 2156 if node == wdirid or node in wdirfilenodeids:
2150 2157 raise error.RevlogError(
2151 2158 _(b"%s: attempt to add wdir revision") % self.indexfile
2152 2159 )
2153 2160
2154 2161 if self._inline:
2155 2162 fh = ifh
2156 2163 else:
2157 2164 fh = dfh
2158 2165
2159 2166 btext = [rawtext]
2160 2167
2161 2168 curr = len(self)
2162 2169 prev = curr - 1
2163 2170 offset = self.end(prev)
2164 2171 p1r, p2r = self.rev(p1), self.rev(p2)
2165 2172
2166 2173 # full versions are inserted when the needed deltas
2167 2174 # become comparable to the uncompressed text
2168 2175 if rawtext is None:
2169 2176 # need rawtext size, before changed by flag processors, which is
2170 2177 # the non-raw size. use revlog explicitly to avoid filelog's extra
2171 2178 # logic that might remove metadata size.
2172 2179 textlen = mdiff.patchedsize(
2173 2180 revlog.size(self, cachedelta[0]), cachedelta[1]
2174 2181 )
2175 2182 else:
2176 2183 textlen = len(rawtext)
2177 2184
2178 2185 if deltacomputer is None:
2179 2186 deltacomputer = deltautil.deltacomputer(self)
2180 2187
2181 2188 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2182 2189
2183 2190 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2184 2191
2185 2192 e = (
2186 2193 offset_type(offset, flags),
2187 2194 deltainfo.deltalen,
2188 2195 textlen,
2189 2196 deltainfo.base,
2190 2197 link,
2191 2198 p1r,
2192 2199 p2r,
2193 2200 node,
2194 2201 )
2195 2202 self.index.append(e)
2196 2203
2197 2204 # Reset the pure node cache start lookup offset to account for new
2198 2205 # revision.
2199 2206 if self._nodepos is not None:
2200 2207 self._nodepos = curr
2201 2208
2202 2209 entry = self._io.packentry(e, self.node, self.version, curr)
2203 2210 self._writeentry(
2204 2211 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2205 2212 )
2206 2213
2207 2214 rawtext = btext[0]
2208 2215
2209 2216 if alwayscache and rawtext is None:
2210 2217 rawtext = deltacomputer.buildtext(revinfo, fh)
2211 2218
2212 2219 if type(rawtext) == bytes: # only accept immutable objects
2213 2220 self._revisioncache = (node, curr, rawtext)
2214 2221 self._chainbasecache[curr] = deltainfo.chainbase
2215 2222 return node
2216 2223
2217 2224 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2218 2225 # Files opened in a+ mode have inconsistent behavior on various
2219 2226 # platforms. Windows requires that a file positioning call be made
2220 2227 # when the file handle transitions between reads and writes. See
2221 2228 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2222 2229 # platforms, Python or the platform itself can be buggy. Some versions
2223 2230 # of Solaris have been observed to not append at the end of the file
2224 2231 # if the file was seeked to before the end. See issue4943 for more.
2225 2232 #
2226 2233 # We work around this issue by inserting a seek() before writing.
2227 2234 # Note: This is likely not necessary on Python 3. However, because
2228 2235 # the file handle is reused for reads and may be seeked there, we need
2229 2236 # to be careful before changing this.
2230 2237 ifh.seek(0, os.SEEK_END)
2231 2238 if dfh:
2232 2239 dfh.seek(0, os.SEEK_END)
2233 2240
2234 2241 curr = len(self) - 1
2235 2242 if not self._inline:
2236 2243 transaction.add(self.datafile, offset)
2237 2244 transaction.add(self.indexfile, curr * len(entry))
2238 2245 if data[0]:
2239 2246 dfh.write(data[0])
2240 2247 dfh.write(data[1])
2241 2248 ifh.write(entry)
2242 2249 else:
2243 2250 offset += curr * self._io.size
2244 2251 transaction.add(self.indexfile, offset, curr)
2245 2252 ifh.write(entry)
2246 2253 ifh.write(data[0])
2247 2254 ifh.write(data[1])
2248 2255 self._enforceinlinesize(transaction, ifh)
2249 2256
2250 2257 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2251 2258 """
2252 2259 add a delta group
2253 2260
2254 2261 given a set of deltas, add them to the revision log. the
2255 2262 first delta is against its parent, which should be in our
2256 2263 log, the rest are against the previous delta.
2257 2264
2258 2265 If ``addrevisioncb`` is defined, it will be called with arguments of
2259 2266 this revlog and the node that was added.
2260 2267 """
2261 2268
2262 2269 if self._writinghandles:
2263 2270 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2264 2271
2265 2272 nodes = []
2266 2273
2267 2274 r = len(self)
2268 2275 end = 0
2269 2276 if r:
2270 2277 end = self.end(r - 1)
2271 2278 ifh = self._indexfp(b"a+")
2272 2279 isize = r * self._io.size
2273 2280 if self._inline:
2274 2281 transaction.add(self.indexfile, end + isize, r)
2275 2282 dfh = None
2276 2283 else:
2277 2284 transaction.add(self.indexfile, isize, r)
2278 2285 transaction.add(self.datafile, end)
2279 2286 dfh = self._datafp(b"a+")
2280 2287
2281 2288 def flush():
2282 2289 if dfh:
2283 2290 dfh.flush()
2284 2291 ifh.flush()
2285 2292
2286 2293 self._writinghandles = (ifh, dfh)
2287 2294
2288 2295 try:
2289 2296 deltacomputer = deltautil.deltacomputer(self)
2290 2297 # loop through our set of deltas
2291 2298 for data in deltas:
2292 2299 node, p1, p2, linknode, deltabase, delta, flags = data
2293 2300 link = linkmapper(linknode)
2294 2301 flags = flags or REVIDX_DEFAULT_FLAGS
2295 2302
2296 2303 nodes.append(node)
2297 2304
2298 2305 if node in self.nodemap:
2299 2306 self._nodeduplicatecallback(transaction, node)
2300 2307 # this can happen if two branches make the same change
2301 2308 continue
2302 2309
2303 2310 for p in (p1, p2):
2304 2311 if p not in self.nodemap:
2305 2312 raise error.LookupError(
2306 2313 p, self.indexfile, _(b'unknown parent')
2307 2314 )
2308 2315
2309 2316 if deltabase not in self.nodemap:
2310 2317 raise error.LookupError(
2311 2318 deltabase, self.indexfile, _(b'unknown delta base')
2312 2319 )
2313 2320
2314 2321 baserev = self.rev(deltabase)
2315 2322
2316 2323 if baserev != nullrev and self.iscensored(baserev):
2317 2324 # if base is censored, delta must be full replacement in a
2318 2325 # single patch operation
2319 2326 hlen = struct.calcsize(b">lll")
2320 2327 oldlen = self.rawsize(baserev)
2321 2328 newlen = len(delta) - hlen
2322 2329 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2323 2330 raise error.CensoredBaseError(
2324 2331 self.indexfile, self.node(baserev)
2325 2332 )
2326 2333
2327 2334 if not flags and self._peek_iscensored(baserev, delta, flush):
2328 2335 flags |= REVIDX_ISCENSORED
2329 2336
2330 2337 # We assume consumers of addrevisioncb will want to retrieve
2331 2338 # the added revision, which will require a call to
2332 2339 # revision(). revision() will fast path if there is a cache
2333 2340 # hit. So, we tell _addrevision() to always cache in this case.
2334 2341 # We're only using addgroup() in the context of changegroup
2335 2342 # generation so the revision data can always be handled as raw
2336 2343 # by the flagprocessor.
2337 2344 self._addrevision(
2338 2345 node,
2339 2346 None,
2340 2347 transaction,
2341 2348 link,
2342 2349 p1,
2343 2350 p2,
2344 2351 flags,
2345 2352 (baserev, delta),
2346 2353 ifh,
2347 2354 dfh,
2348 2355 alwayscache=bool(addrevisioncb),
2349 2356 deltacomputer=deltacomputer,
2350 2357 )
2351 2358
2352 2359 if addrevisioncb:
2353 2360 addrevisioncb(self, node)
2354 2361
2355 2362 if not dfh and not self._inline:
2356 2363 # addrevision switched from inline to conventional
2357 2364 # reopen the index
2358 2365 ifh.close()
2359 2366 dfh = self._datafp(b"a+")
2360 2367 ifh = self._indexfp(b"a+")
2361 2368 self._writinghandles = (ifh, dfh)
2362 2369 finally:
2363 2370 self._writinghandles = None
2364 2371
2365 2372 if dfh:
2366 2373 dfh.close()
2367 2374 ifh.close()
2368 2375
2369 2376 return nodes
2370 2377
2371 2378 def iscensored(self, rev):
2372 2379 """Check if a file revision is censored."""
2373 2380 if not self._censorable:
2374 2381 return False
2375 2382
2376 2383 return self.flags(rev) & REVIDX_ISCENSORED
2377 2384
2378 2385 def _peek_iscensored(self, baserev, delta, flush):
2379 2386 """Quickly check if a delta produces a censored revision."""
2380 2387 if not self._censorable:
2381 2388 return False
2382 2389
2383 2390 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2384 2391
2385 2392 def getstrippoint(self, minlink):
2386 2393 """find the minimum rev that must be stripped to strip the linkrev
2387 2394
2388 2395 Returns a tuple containing the minimum rev and a set of all revs that
2389 2396 have linkrevs that will be broken by this strip.
2390 2397 """
2391 2398 return storageutil.resolvestripinfo(
2392 2399 minlink,
2393 2400 len(self) - 1,
2394 2401 self.headrevs(),
2395 2402 self.linkrev,
2396 2403 self.parentrevs,
2397 2404 )
2398 2405
2399 2406 def strip(self, minlink, transaction):
2400 2407 """truncate the revlog on the first revision with a linkrev >= minlink
2401 2408
2402 2409 This function is called when we're stripping revision minlink and
2403 2410 its descendants from the repository.
2404 2411
2405 2412 We have to remove all revisions with linkrev >= minlink, because
2406 2413 the equivalent changelog revisions will be renumbered after the
2407 2414 strip.
2408 2415
2409 2416 So we truncate the revlog on the first of these revisions, and
2410 2417 trust that the caller has saved the revisions that shouldn't be
2411 2418 removed and that it'll re-add them after this truncation.
2412 2419 """
2413 2420 if len(self) == 0:
2414 2421 return
2415 2422
2416 2423 rev, _ = self.getstrippoint(minlink)
2417 2424 if rev == len(self):
2418 2425 return
2419 2426
2420 2427 # first truncate the files on disk
2421 2428 end = self.start(rev)
2422 2429 if not self._inline:
2423 2430 transaction.add(self.datafile, end)
2424 2431 end = rev * self._io.size
2425 2432 else:
2426 2433 end += rev * self._io.size
2427 2434
2428 2435 transaction.add(self.indexfile, end)
2429 2436
2430 2437 # then reset internal state in memory to forget those revisions
2431 2438 self._revisioncache = None
2432 2439 self._chaininfocache = {}
2433 2440 self._chunkclear()
2434 for x in pycompat.xrange(rev, len(self)):
2435 del self.nodemap[self.node(x)]
2436 2441
2437 2442 del self.index[rev:-1]
2438 2443 self._nodepos = None
2439 2444
2440 2445 def checksize(self):
2441 2446 """Check size of index and data files
2442 2447
2443 2448 return a (dd, di) tuple.
2444 2449 - dd: extra bytes for the "data" file
2445 2450 - di: extra bytes for the "index" file
2446 2451
2447 2452 A healthy revlog will return (0, 0).
2448 2453 """
2449 2454 expected = 0
2450 2455 if len(self):
2451 2456 expected = max(0, self.end(len(self) - 1))
2452 2457
2453 2458 try:
2454 2459 with self._datafp() as f:
2455 2460 f.seek(0, io.SEEK_END)
2456 2461 actual = f.tell()
2457 2462 dd = actual - expected
2458 2463 except IOError as inst:
2459 2464 if inst.errno != errno.ENOENT:
2460 2465 raise
2461 2466 dd = 0
2462 2467
2463 2468 try:
2464 2469 f = self.opener(self.indexfile)
2465 2470 f.seek(0, io.SEEK_END)
2466 2471 actual = f.tell()
2467 2472 f.close()
2468 2473 s = self._io.size
2469 2474 i = max(0, actual // s)
2470 2475 di = actual - (i * s)
2471 2476 if self._inline:
2472 2477 databytes = 0
2473 2478 for r in self:
2474 2479 databytes += max(0, self.length(r))
2475 2480 dd = 0
2476 2481 di = actual - len(self) * s - databytes
2477 2482 except IOError as inst:
2478 2483 if inst.errno != errno.ENOENT:
2479 2484 raise
2480 2485 di = 0
2481 2486
2482 2487 return (dd, di)
2483 2488
2484 2489 def files(self):
2485 2490 res = [self.indexfile]
2486 2491 if not self._inline:
2487 2492 res.append(self.datafile)
2488 2493 return res
2489 2494
2490 2495 def emitrevisions(
2491 2496 self,
2492 2497 nodes,
2493 2498 nodesorder=None,
2494 2499 revisiondata=False,
2495 2500 assumehaveparentrevisions=False,
2496 2501 deltamode=repository.CG_DELTAMODE_STD,
2497 2502 ):
2498 2503 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2499 2504 raise error.ProgrammingError(
2500 2505 b'unhandled value for nodesorder: %s' % nodesorder
2501 2506 )
2502 2507
2503 2508 if nodesorder is None and not self._generaldelta:
2504 2509 nodesorder = b'storage'
2505 2510
2506 2511 if (
2507 2512 not self._storedeltachains
2508 2513 and deltamode != repository.CG_DELTAMODE_PREV
2509 2514 ):
2510 2515 deltamode = repository.CG_DELTAMODE_FULL
2511 2516
2512 2517 return storageutil.emitrevisions(
2513 2518 self,
2514 2519 nodes,
2515 2520 nodesorder,
2516 2521 revlogrevisiondelta,
2517 2522 deltaparentfn=self.deltaparent,
2518 2523 candeltafn=self.candelta,
2519 2524 rawsizefn=self.rawsize,
2520 2525 revdifffn=self.revdiff,
2521 2526 flagsfn=self.flags,
2522 2527 deltamode=deltamode,
2523 2528 revisiondata=revisiondata,
2524 2529 assumehaveparentrevisions=assumehaveparentrevisions,
2525 2530 )
2526 2531
2527 2532 DELTAREUSEALWAYS = b'always'
2528 2533 DELTAREUSESAMEREVS = b'samerevs'
2529 2534 DELTAREUSENEVER = b'never'
2530 2535
2531 2536 DELTAREUSEFULLADD = b'fulladd'
2532 2537
2533 2538 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2534 2539
2535 2540 def clone(
2536 2541 self,
2537 2542 tr,
2538 2543 destrevlog,
2539 2544 addrevisioncb=None,
2540 2545 deltareuse=DELTAREUSESAMEREVS,
2541 2546 forcedeltabothparents=None,
2542 2547 sidedatacompanion=None,
2543 2548 ):
2544 2549 """Copy this revlog to another, possibly with format changes.
2545 2550
2546 2551 The destination revlog will contain the same revisions and nodes.
2547 2552 However, it may not be bit-for-bit identical due to e.g. delta encoding
2548 2553 differences.
2549 2554
2550 2555 The ``deltareuse`` argument control how deltas from the existing revlog
2551 2556 are preserved in the destination revlog. The argument can have the
2552 2557 following values:
2553 2558
2554 2559 DELTAREUSEALWAYS
2555 2560 Deltas will always be reused (if possible), even if the destination
2556 2561 revlog would not select the same revisions for the delta. This is the
2557 2562 fastest mode of operation.
2558 2563 DELTAREUSESAMEREVS
2559 2564 Deltas will be reused if the destination revlog would pick the same
2560 2565 revisions for the delta. This mode strikes a balance between speed
2561 2566 and optimization.
2562 2567 DELTAREUSENEVER
2563 2568 Deltas will never be reused. This is the slowest mode of execution.
2564 2569 This mode can be used to recompute deltas (e.g. if the diff/delta
2565 2570 algorithm changes).
2566 2571 DELTAREUSEFULLADD
2567 2572 Revision will be re-added as if their were new content. This is
2568 2573 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2569 2574 eg: large file detection and handling.
2570 2575
2571 2576 Delta computation can be slow, so the choice of delta reuse policy can
2572 2577 significantly affect run time.
2573 2578
2574 2579 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2575 2580 two extremes. Deltas will be reused if they are appropriate. But if the
2576 2581 delta could choose a better revision, it will do so. This means if you
2577 2582 are converting a non-generaldelta revlog to a generaldelta revlog,
2578 2583 deltas will be recomputed if the delta's parent isn't a parent of the
2579 2584 revision.
2580 2585
2581 2586 In addition to the delta policy, the ``forcedeltabothparents``
2582 2587 argument controls whether to force compute deltas against both parents
2583 2588 for merges. By default, the current default is used.
2584 2589
2585 2590 If not None, the `sidedatacompanion` is callable that accept two
2586 2591 arguments:
2587 2592
2588 2593 (srcrevlog, rev)
2589 2594
2590 2595 and return a triplet that control changes to sidedata content from the
2591 2596 old revision to the new clone result:
2592 2597
2593 2598 (dropall, filterout, update)
2594 2599
2595 2600 * if `dropall` is True, all sidedata should be dropped
2596 2601 * `filterout` is a set of sidedata keys that should be dropped
2597 2602 * `update` is a mapping of additionnal/new key -> value
2598 2603 """
2599 2604 if deltareuse not in self.DELTAREUSEALL:
2600 2605 raise ValueError(
2601 2606 _(b'value for deltareuse invalid: %s') % deltareuse
2602 2607 )
2603 2608
2604 2609 if len(destrevlog):
2605 2610 raise ValueError(_(b'destination revlog is not empty'))
2606 2611
2607 2612 if getattr(self, 'filteredrevs', None):
2608 2613 raise ValueError(_(b'source revlog has filtered revisions'))
2609 2614 if getattr(destrevlog, 'filteredrevs', None):
2610 2615 raise ValueError(_(b'destination revlog has filtered revisions'))
2611 2616
2612 2617 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2613 2618 # if possible.
2614 2619 oldlazydelta = destrevlog._lazydelta
2615 2620 oldlazydeltabase = destrevlog._lazydeltabase
2616 2621 oldamd = destrevlog._deltabothparents
2617 2622
2618 2623 try:
2619 2624 if deltareuse == self.DELTAREUSEALWAYS:
2620 2625 destrevlog._lazydeltabase = True
2621 2626 destrevlog._lazydelta = True
2622 2627 elif deltareuse == self.DELTAREUSESAMEREVS:
2623 2628 destrevlog._lazydeltabase = False
2624 2629 destrevlog._lazydelta = True
2625 2630 elif deltareuse == self.DELTAREUSENEVER:
2626 2631 destrevlog._lazydeltabase = False
2627 2632 destrevlog._lazydelta = False
2628 2633
2629 2634 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2630 2635
2631 2636 self._clone(
2632 2637 tr,
2633 2638 destrevlog,
2634 2639 addrevisioncb,
2635 2640 deltareuse,
2636 2641 forcedeltabothparents,
2637 2642 sidedatacompanion,
2638 2643 )
2639 2644
2640 2645 finally:
2641 2646 destrevlog._lazydelta = oldlazydelta
2642 2647 destrevlog._lazydeltabase = oldlazydeltabase
2643 2648 destrevlog._deltabothparents = oldamd
2644 2649
2645 2650 def _clone(
2646 2651 self,
2647 2652 tr,
2648 2653 destrevlog,
2649 2654 addrevisioncb,
2650 2655 deltareuse,
2651 2656 forcedeltabothparents,
2652 2657 sidedatacompanion,
2653 2658 ):
2654 2659 """perform the core duty of `revlog.clone` after parameter processing"""
2655 2660 deltacomputer = deltautil.deltacomputer(destrevlog)
2656 2661 index = self.index
2657 2662 for rev in self:
2658 2663 entry = index[rev]
2659 2664
2660 2665 # Some classes override linkrev to take filtered revs into
2661 2666 # account. Use raw entry from index.
2662 2667 flags = entry[0] & 0xFFFF
2663 2668 linkrev = entry[4]
2664 2669 p1 = index[entry[5]][7]
2665 2670 p2 = index[entry[6]][7]
2666 2671 node = entry[7]
2667 2672
2668 2673 sidedataactions = (False, [], {})
2669 2674 if sidedatacompanion is not None:
2670 2675 sidedataactions = sidedatacompanion(self, rev)
2671 2676
2672 2677 # (Possibly) reuse the delta from the revlog if allowed and
2673 2678 # the revlog chunk is a delta.
2674 2679 cachedelta = None
2675 2680 rawtext = None
2676 2681 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2677 2682 dropall, filterout, update = sidedataactions
2678 2683 text, sidedata = self._revisiondata(rev)
2679 2684 if dropall:
2680 2685 sidedata = {}
2681 2686 for key in filterout:
2682 2687 sidedata.pop(key, None)
2683 2688 sidedata.update(update)
2684 2689 if not sidedata:
2685 2690 sidedata = None
2686 2691 destrevlog.addrevision(
2687 2692 text,
2688 2693 tr,
2689 2694 linkrev,
2690 2695 p1,
2691 2696 p2,
2692 2697 cachedelta=cachedelta,
2693 2698 node=node,
2694 2699 flags=flags,
2695 2700 deltacomputer=deltacomputer,
2696 2701 sidedata=sidedata,
2697 2702 )
2698 2703 else:
2699 2704 if destrevlog._lazydelta:
2700 2705 dp = self.deltaparent(rev)
2701 2706 if dp != nullrev:
2702 2707 cachedelta = (dp, bytes(self._chunk(rev)))
2703 2708
2704 2709 if not cachedelta:
2705 2710 rawtext = self.rawdata(rev)
2706 2711
2707 2712 ifh = destrevlog.opener(
2708 2713 destrevlog.indexfile, b'a+', checkambig=False
2709 2714 )
2710 2715 dfh = None
2711 2716 if not destrevlog._inline:
2712 2717 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2713 2718 try:
2714 2719 destrevlog._addrevision(
2715 2720 node,
2716 2721 rawtext,
2717 2722 tr,
2718 2723 linkrev,
2719 2724 p1,
2720 2725 p2,
2721 2726 flags,
2722 2727 cachedelta,
2723 2728 ifh,
2724 2729 dfh,
2725 2730 deltacomputer=deltacomputer,
2726 2731 )
2727 2732 finally:
2728 2733 if dfh:
2729 2734 dfh.close()
2730 2735 ifh.close()
2731 2736
2732 2737 if addrevisioncb:
2733 2738 addrevisioncb(self, rev, node)
2734 2739
2735 2740 def censorrevision(self, tr, censornode, tombstone=b''):
2736 2741 if (self.version & 0xFFFF) == REVLOGV0:
2737 2742 raise error.RevlogError(
2738 2743 _(b'cannot censor with version %d revlogs') % self.version
2739 2744 )
2740 2745
2741 2746 censorrev = self.rev(censornode)
2742 2747 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2743 2748
2744 2749 if len(tombstone) > self.rawsize(censorrev):
2745 2750 raise error.Abort(
2746 2751 _(b'censor tombstone must be no longer than censored data')
2747 2752 )
2748 2753
2749 2754 # Rewriting the revlog in place is hard. Our strategy for censoring is
2750 2755 # to create a new revlog, copy all revisions to it, then replace the
2751 2756 # revlogs on transaction close.
2752 2757
2753 2758 newindexfile = self.indexfile + b'.tmpcensored'
2754 2759 newdatafile = self.datafile + b'.tmpcensored'
2755 2760
2756 2761 # This is a bit dangerous. We could easily have a mismatch of state.
2757 2762 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2758 2763 newrl.version = self.version
2759 2764 newrl._generaldelta = self._generaldelta
2760 2765 newrl._io = self._io
2761 2766
2762 2767 for rev in self.revs():
2763 2768 node = self.node(rev)
2764 2769 p1, p2 = self.parents(node)
2765 2770
2766 2771 if rev == censorrev:
2767 2772 newrl.addrawrevision(
2768 2773 tombstone,
2769 2774 tr,
2770 2775 self.linkrev(censorrev),
2771 2776 p1,
2772 2777 p2,
2773 2778 censornode,
2774 2779 REVIDX_ISCENSORED,
2775 2780 )
2776 2781
2777 2782 if newrl.deltaparent(rev) != nullrev:
2778 2783 raise error.Abort(
2779 2784 _(
2780 2785 b'censored revision stored as delta; '
2781 2786 b'cannot censor'
2782 2787 ),
2783 2788 hint=_(
2784 2789 b'censoring of revlogs is not '
2785 2790 b'fully implemented; please report '
2786 2791 b'this bug'
2787 2792 ),
2788 2793 )
2789 2794 continue
2790 2795
2791 2796 if self.iscensored(rev):
2792 2797 if self.deltaparent(rev) != nullrev:
2793 2798 raise error.Abort(
2794 2799 _(
2795 2800 b'cannot censor due to censored '
2796 2801 b'revision having delta stored'
2797 2802 )
2798 2803 )
2799 2804 rawtext = self._chunk(rev)
2800 2805 else:
2801 2806 rawtext = self.rawdata(rev)
2802 2807
2803 2808 newrl.addrawrevision(
2804 2809 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2805 2810 )
2806 2811
2807 2812 tr.addbackup(self.indexfile, location=b'store')
2808 2813 if not self._inline:
2809 2814 tr.addbackup(self.datafile, location=b'store')
2810 2815
2811 2816 self.opener.rename(newrl.indexfile, self.indexfile)
2812 2817 if not self._inline:
2813 2818 self.opener.rename(newrl.datafile, self.datafile)
2814 2819
2815 2820 self.clearcaches()
2816 2821 self._loadindex()
2817 2822
2818 2823 def verifyintegrity(self, state):
2819 2824 """Verifies the integrity of the revlog.
2820 2825
2821 2826 Yields ``revlogproblem`` instances describing problems that are
2822 2827 found.
2823 2828 """
2824 2829 dd, di = self.checksize()
2825 2830 if dd:
2826 2831 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2827 2832 if di:
2828 2833 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2829 2834
2830 2835 version = self.version & 0xFFFF
2831 2836
2832 2837 # The verifier tells us what version revlog we should be.
2833 2838 if version != state[b'expectedversion']:
2834 2839 yield revlogproblem(
2835 2840 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2836 2841 % (self.indexfile, version, state[b'expectedversion'])
2837 2842 )
2838 2843
2839 2844 state[b'skipread'] = set()
2840 2845
2841 2846 for rev in self:
2842 2847 node = self.node(rev)
2843 2848
2844 2849 # Verify contents. 4 cases to care about:
2845 2850 #
2846 2851 # common: the most common case
2847 2852 # rename: with a rename
2848 2853 # meta: file content starts with b'\1\n', the metadata
2849 2854 # header defined in filelog.py, but without a rename
2850 2855 # ext: content stored externally
2851 2856 #
2852 2857 # More formally, their differences are shown below:
2853 2858 #
2854 2859 # | common | rename | meta | ext
2855 2860 # -------------------------------------------------------
2856 2861 # flags() | 0 | 0 | 0 | not 0
2857 2862 # renamed() | False | True | False | ?
2858 2863 # rawtext[0:2]=='\1\n'| False | True | True | ?
2859 2864 #
2860 2865 # "rawtext" means the raw text stored in revlog data, which
2861 2866 # could be retrieved by "rawdata(rev)". "text"
2862 2867 # mentioned below is "revision(rev)".
2863 2868 #
2864 2869 # There are 3 different lengths stored physically:
2865 2870 # 1. L1: rawsize, stored in revlog index
2866 2871 # 2. L2: len(rawtext), stored in revlog data
2867 2872 # 3. L3: len(text), stored in revlog data if flags==0, or
2868 2873 # possibly somewhere else if flags!=0
2869 2874 #
2870 2875 # L1 should be equal to L2. L3 could be different from them.
2871 2876 # "text" may or may not affect commit hash depending on flag
2872 2877 # processors (see flagutil.addflagprocessor).
2873 2878 #
2874 2879 # | common | rename | meta | ext
2875 2880 # -------------------------------------------------
2876 2881 # rawsize() | L1 | L1 | L1 | L1
2877 2882 # size() | L1 | L2-LM | L1(*) | L1 (?)
2878 2883 # len(rawtext) | L2 | L2 | L2 | L2
2879 2884 # len(text) | L2 | L2 | L2 | L3
2880 2885 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2881 2886 #
2882 2887 # LM: length of metadata, depending on rawtext
2883 2888 # (*): not ideal, see comment in filelog.size
2884 2889 # (?): could be "- len(meta)" if the resolved content has
2885 2890 # rename metadata
2886 2891 #
2887 2892 # Checks needed to be done:
2888 2893 # 1. length check: L1 == L2, in all cases.
2889 2894 # 2. hash check: depending on flag processor, we may need to
2890 2895 # use either "text" (external), or "rawtext" (in revlog).
2891 2896
2892 2897 try:
2893 2898 skipflags = state.get(b'skipflags', 0)
2894 2899 if skipflags:
2895 2900 skipflags &= self.flags(rev)
2896 2901
2897 2902 if skipflags:
2898 2903 state[b'skipread'].add(node)
2899 2904 else:
2900 2905 # Side-effect: read content and verify hash.
2901 2906 self.revision(node)
2902 2907
2903 2908 l1 = self.rawsize(rev)
2904 2909 l2 = len(self.rawdata(node))
2905 2910
2906 2911 if l1 != l2:
2907 2912 yield revlogproblem(
2908 2913 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2909 2914 node=node,
2910 2915 )
2911 2916
2912 2917 except error.CensoredNodeError:
2913 2918 if state[b'erroroncensored']:
2914 2919 yield revlogproblem(
2915 2920 error=_(b'censored file data'), node=node
2916 2921 )
2917 2922 state[b'skipread'].add(node)
2918 2923 except Exception as e:
2919 2924 yield revlogproblem(
2920 2925 error=_(b'unpacking %s: %s')
2921 2926 % (short(node), stringutil.forcebytestr(e)),
2922 2927 node=node,
2923 2928 )
2924 2929 state[b'skipread'].add(node)
2925 2930
2926 2931 def storageinfo(
2927 2932 self,
2928 2933 exclusivefiles=False,
2929 2934 sharedfiles=False,
2930 2935 revisionscount=False,
2931 2936 trackedsize=False,
2932 2937 storedsize=False,
2933 2938 ):
2934 2939 d = {}
2935 2940
2936 2941 if exclusivefiles:
2937 2942 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2938 2943 if not self._inline:
2939 2944 d[b'exclusivefiles'].append((self.opener, self.datafile))
2940 2945
2941 2946 if sharedfiles:
2942 2947 d[b'sharedfiles'] = []
2943 2948
2944 2949 if revisionscount:
2945 2950 d[b'revisionscount'] = len(self)
2946 2951
2947 2952 if trackedsize:
2948 2953 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2949 2954
2950 2955 if storedsize:
2951 2956 d[b'storedsize'] = sum(
2952 2957 self.opener.stat(path).st_size for path in self.files()
2953 2958 )
2954 2959
2955 2960 return d
@@ -1,286 +1,285 b''
1 1 # unionrepo.py - repository class for viewing union of repository changesets
2 2 #
3 3 # Derived from bundlerepo.py
4 4 # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
5 5 # Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Repository class for "in-memory pull" of one local repository to another,
11 11 allowing operations like diff and log with revsets.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18
19 19 from . import (
20 20 changelog,
21 21 cmdutil,
22 22 encoding,
23 23 error,
24 24 filelog,
25 25 localrepo,
26 26 manifest,
27 27 mdiff,
28 28 pathutil,
29 29 revlog,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33
34 34
35 35 class unionrevlog(revlog.revlog):
36 36 def __init__(self, opener, indexfile, revlog2, linkmapper):
37 37 # How it works:
38 38 # To retrieve a revision, we just need to know the node id so we can
39 39 # look it up in revlog2.
40 40 #
41 41 # To differentiate a rev in the second revlog from a rev in the revlog,
42 42 # we check revision against repotiprev.
43 43 opener = vfsmod.readonlyvfs(opener)
44 44 revlog.revlog.__init__(self, opener, indexfile)
45 45 self.revlog2 = revlog2
46 46
47 47 n = len(self)
48 48 self.repotiprev = n - 1
49 49 self.bundlerevs = set() # used by 'bundle()' revset expression
50 50 for rev2 in self.revlog2:
51 51 rev = self.revlog2.index[rev2]
52 52 # rev numbers - in revlog2, very different from self.rev
53 53 _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
54 54 flags = _start & 0xFFFF
55 55
56 56 if linkmapper is None: # link is to same revlog
57 57 assert linkrev == rev2 # we never link back
58 58 link = n
59 59 else: # rev must be mapped from repo2 cl to unified cl by linkmapper
60 60 link = linkmapper(linkrev)
61 61
62 62 if linkmapper is not None: # link is to same revlog
63 63 base = linkmapper(base)
64 64
65 65 if node in self.nodemap:
66 66 # this happens for the common revlog revisions
67 67 self.bundlerevs.add(self.nodemap[node])
68 68 continue
69 69
70 70 p1node = self.revlog2.node(p1rev)
71 71 p2node = self.revlog2.node(p2rev)
72 72
73 73 # TODO: it's probably wrong to set compressed length to None, but
74 74 # I have no idea if csize is valid in the base revlog context.
75 75 e = (
76 76 flags,
77 77 None,
78 78 rsize,
79 79 base,
80 80 link,
81 81 self.rev(p1node),
82 82 self.rev(p2node),
83 83 node,
84 84 )
85 85 self.index.append(e)
86 self.nodemap[node] = n
87 86 self.bundlerevs.add(n)
88 87 n += 1
89 88
90 89 def _chunk(self, rev):
91 90 if rev <= self.repotiprev:
92 91 return revlog.revlog._chunk(self, rev)
93 92 return self.revlog2._chunk(self.node(rev))
94 93
95 94 def revdiff(self, rev1, rev2):
96 95 """return or calculate a delta between two revisions"""
97 96 if rev1 > self.repotiprev and rev2 > self.repotiprev:
98 97 return self.revlog2.revdiff(
99 98 self.revlog2.rev(self.node(rev1)),
100 99 self.revlog2.rev(self.node(rev2)),
101 100 )
102 101 elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
103 102 return super(unionrevlog, self).revdiff(rev1, rev2)
104 103
105 104 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
106 105
107 106 def _revisiondata(self, nodeorrev, _df=None, raw=False):
108 107 if isinstance(nodeorrev, int):
109 108 rev = nodeorrev
110 109 node = self.node(rev)
111 110 else:
112 111 node = nodeorrev
113 112 rev = self.rev(node)
114 113
115 114 if rev > self.repotiprev:
116 115 # work around manifestrevlog NOT being a revlog
117 116 revlog2 = getattr(self.revlog2, '_revlog', self.revlog2)
118 117 func = revlog2._revisiondata
119 118 else:
120 119 func = super(unionrevlog, self)._revisiondata
121 120 return func(node, _df=_df, raw=raw)
122 121
123 122 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
124 123 raise NotImplementedError
125 124
126 125 def addgroup(
127 126 self,
128 127 deltas,
129 128 linkmapper,
130 129 transaction,
131 130 addrevisioncb=None,
132 131 maybemissingparents=False,
133 132 ):
134 133 raise NotImplementedError
135 134
136 135 def strip(self, minlink, transaction):
137 136 raise NotImplementedError
138 137
139 138 def checksize(self):
140 139 raise NotImplementedError
141 140
142 141
143 142 class unionchangelog(unionrevlog, changelog.changelog):
144 143 def __init__(self, opener, opener2):
145 144 changelog.changelog.__init__(self, opener)
146 145 linkmapper = None
147 146 changelog2 = changelog.changelog(opener2)
148 147 unionrevlog.__init__(
149 148 self, opener, self.indexfile, changelog2, linkmapper
150 149 )
151 150
152 151
153 152 class unionmanifest(unionrevlog, manifest.manifestrevlog):
154 153 def __init__(self, opener, opener2, linkmapper):
155 154 manifest.manifestrevlog.__init__(self, opener)
156 155 manifest2 = manifest.manifestrevlog(opener2)
157 156 unionrevlog.__init__(
158 157 self, opener, self.indexfile, manifest2, linkmapper
159 158 )
160 159
161 160
162 161 class unionfilelog(filelog.filelog):
163 162 def __init__(self, opener, path, opener2, linkmapper, repo):
164 163 filelog.filelog.__init__(self, opener, path)
165 164 filelog2 = filelog.filelog(opener2, path)
166 165 self._revlog = unionrevlog(
167 166 opener, self.indexfile, filelog2._revlog, linkmapper
168 167 )
169 168 self._repo = repo
170 169 self.repotiprev = self._revlog.repotiprev
171 170 self.revlog2 = self._revlog.revlog2
172 171
173 172 def iscensored(self, rev):
174 173 """Check if a revision is censored."""
175 174 if rev <= self.repotiprev:
176 175 return filelog.filelog.iscensored(self, rev)
177 176 node = self.node(rev)
178 177 return self.revlog2.iscensored(self.revlog2.rev(node))
179 178
180 179
181 180 class unionpeer(localrepo.localpeer):
182 181 def canpush(self):
183 182 return False
184 183
185 184
186 185 class unionrepository(object):
187 186 """Represents the union of data in 2 repositories.
188 187
189 188 Instances are not usable if constructed directly. Use ``instance()``
190 189 or ``makeunionrepository()`` to create a usable instance.
191 190 """
192 191
193 192 def __init__(self, repo2, url):
194 193 self.repo2 = repo2
195 194 self._url = url
196 195
197 196 self.ui.setconfig(b'phases', b'publish', False, b'unionrepo')
198 197
199 198 @localrepo.unfilteredpropertycache
200 199 def changelog(self):
201 200 return unionchangelog(self.svfs, self.repo2.svfs)
202 201
203 202 @localrepo.unfilteredpropertycache
204 203 def manifestlog(self):
205 204 rootstore = unionmanifest(
206 205 self.svfs, self.repo2.svfs, self.unfiltered()._clrev
207 206 )
208 207 return manifest.manifestlog(
209 208 self.svfs, self, rootstore, self.narrowmatch()
210 209 )
211 210
212 211 def _clrev(self, rev2):
213 212 """map from repo2 changelog rev to temporary rev in self.changelog"""
214 213 node = self.repo2.changelog.node(rev2)
215 214 return self.changelog.rev(node)
216 215
217 216 def url(self):
218 217 return self._url
219 218
220 219 def file(self, f):
221 220 return unionfilelog(
222 221 self.svfs, f, self.repo2.svfs, self.unfiltered()._clrev, self
223 222 )
224 223
225 224 def close(self):
226 225 self.repo2.close()
227 226
228 227 def cancopy(self):
229 228 return False
230 229
231 230 def peer(self):
232 231 return unionpeer(self)
233 232
234 233 def getcwd(self):
235 234 return encoding.getcwd() # always outside the repo
236 235
237 236
238 237 def instance(ui, path, create, intents=None, createopts=None):
239 238 if create:
240 239 raise error.Abort(_(b'cannot create new union repository'))
241 240 parentpath = ui.config(b"bundle", b"mainreporoot")
242 241 if not parentpath:
243 242 # try to find the correct path to the working directory repo
244 243 parentpath = cmdutil.findrepo(encoding.getcwd())
245 244 if parentpath is None:
246 245 parentpath = b''
247 246 if parentpath:
248 247 # Try to make the full path relative so we get a nice, short URL.
249 248 # In particular, we don't want temp dir names in test outputs.
250 249 cwd = encoding.getcwd()
251 250 if parentpath == cwd:
252 251 parentpath = b''
253 252 else:
254 253 cwd = pathutil.normasprefix(cwd)
255 254 if parentpath.startswith(cwd):
256 255 parentpath = parentpath[len(cwd) :]
257 256 if path.startswith(b'union:'):
258 257 s = path.split(b":", 1)[1].split(b"+", 1)
259 258 if len(s) == 1:
260 259 repopath, repopath2 = parentpath, s[0]
261 260 else:
262 261 repopath, repopath2 = s
263 262 else:
264 263 repopath, repopath2 = parentpath, path
265 264
266 265 return makeunionrepository(ui, repopath, repopath2)
267 266
268 267
269 268 def makeunionrepository(ui, repopath1, repopath2):
270 269 """Make a union repository object from 2 local repo paths."""
271 270 repo1 = localrepo.instance(ui, repopath1, create=False)
272 271 repo2 = localrepo.instance(ui, repopath2, create=False)
273 272
274 273 url = b'union:%s+%s' % (
275 274 util.expandpath(repopath1),
276 275 util.expandpath(repopath2),
277 276 )
278 277
279 278 class derivedunionrepository(unionrepository, repo1.__class__):
280 279 pass
281 280
282 281 repo = repo1
283 282 repo.__class__ = derivedunionrepository
284 283 unionrepository.__init__(repo1, repo2, url)
285 284
286 285 return repo
General Comments 0
You need to be logged in to leave comments. Login now