##// END OF EJS Templates
obsolete: fix n^2 marker computation behavior...
obsolete: fix n^2 marker computation behavior Previously, if you ran obsolete.createmarkers with a bunch of markers that did not have successors (like when you do a prune), it encountered a n^2 computation behavior because the loop would read the changelog (to get ctx.parents()), then add a marker, in a loop. Adding a marker invalidated the computehidden cache, and reading the changelog recomputed it. This resulted in pruning 150 commits taking 150+ seconds in a large repo. The fix is to break the reading part of the loop to be separate from the writing part.

File last commit:

r27917:97e0dc6d stable
r27984:e60e13a8 default
Show More
repoview.py
353 lines | 13.2 KiB | text/x-python | PythonLexer
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 # repoview.py - Filtered view of a localrepo object
#
# Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
# Logilab SA <contact@logilab.fr>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Gregory Szorc
repoview: use absolute_import
r25972 from __future__ import absolute_import
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 import copy
Gregory Szorc
repoview: use absolute_import
r25972 import heapq
David Soria Parra
repoview: add caching bits...
r22150 import struct
Gregory Szorc
repoview: use absolute_import
r25972
from .node import nullrev
from . import (
error,
obsolete,
phases,
tags as tagsmod,
util,
)
Pierre-Yves David
clfilter: introduces a hidden filter...
r18242
Pierre-Yves David
repoview: extract hideable revision computation in a dedicated function...
r18293 def hideablerevs(repo):
"""Revisions candidates to be hidden
This is a standalone function to help extensions to wrap it."""
return obsolete.getrevs(repo, 'obsolete')
Durham Goode
repoview: improve compute staticblockers perf...
r24565 def _getstatichidden(repo):
Pierre-Yves David
repoview: update documentation of _getstatichidden...
r24615 """Revision to be hidden (disregarding dynamic blocker)
Sean Farley
repoview: add _gethiddenblockers method...
r20940
Pierre-Yves David
repoview: update documentation of _getstatichidden...
r24615 To keep a consistent graph, we cannot hide any revisions with
non-hidden descendants. This function computes the set of
revisions that could be hidden while keeping the graph consistent.
A second pass will be done to apply "dynamic blocker" like bookmarks or
working directory parents.
"""
Sean Farley
repoview: add _gethiddenblockers method...
r20940 assert not repo.changelog.filteredrevs
Pierre-Yves David
repoview: simplify process in _getstatichidden...
r24617 hidden = set(hideablerevs(repo))
if hidden:
Durham Goode
repoview: improve compute staticblockers perf...
r24565 getphase = repo._phasecache.phase
getparentrevs = repo.changelog.parentrevs
Pierre-Yves David
repoview: directly skip public head in _getstatichidden...
r24618 # Skip heads which are public (guaranteed to not be hidden)
heap = [-r for r in repo.changelog.headrevs() if getphase(repo, r)]
Pierre-Yves David
repoview: use a heap in _getstatichidden...
r24616 heapq.heapify(heap)
heappop = heapq.heappop
heappush = heapq.heappush
Pierre-Yves David
repoview: avoid processing the same rev twice in _getstatichidden...
r24620 seen = set() # no need to init it with heads, they have no children
Pierre-Yves David
repoview: use a heap in _getstatichidden...
r24616 while heap:
Pierre-Yves David
repoview: simplify process in _getstatichidden...
r24617 rev = -heappop(heap)
# All children have been processed so at that point, if no children
# removed 'rev' from the 'hidden' set, 'rev' is going to be hidden.
blocker = rev not in hidden
for parent in getparentrevs(rev):
if parent == nullrev:
continue
if blocker:
# If visible, ensure parent will be visible too
hidden.discard(parent)
Pierre-Yves David
repoview: avoid processing the same rev twice in _getstatichidden...
r24620 # - Avoid adding the same revision twice
# - Skip nodes which are public (guaranteed to not be hidden)
pre = len(seen)
seen.add(parent)
if pre < len(seen) and getphase(repo, rev):
Pierre-Yves David
repoview: skip public parent earlier in _getstatichidden...
r24619 heappush(heap, -parent)
Pierre-Yves David
repoview: simplify process in _getstatichidden...
r24617 return hidden
David Soria Parra
repoview: split _gethiddenblockers...
r22149
def _getdynamicblockers(repo):
"""Non-cacheable revisions blocking hidden changesets from being filtered.
Get revisions that will block hidden changesets and are likely to change,
but unlikely to create hidden blockers. They won't be cached, so be careful
with adding additional computation."""
cl = repo.changelog
blockers = set()
blockers.update([par.rev() for par in repo[None].parents()])
blockers.update([cl.rev(bm) for bm in repo._bookmarks.values()])
tags = {}
tagsmod.readlocaltags(repo.ui, repo, tags, {})
if tags:
rev, nodemap = cl.rev, cl.nodemap
blockers.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
Sean Farley
repoview: add _gethiddenblockers method...
r20940 return blockers
David Soria Parra
repoview: add caching bits...
r22150 cacheversion = 1
cachefile = 'cache/hidden'
def cachehash(repo, hideable):
"""return sha1 hash of repository data to identify a valid cache.
We calculate a sha1 of repo heads and the content of the obsstore and write
it to the cache. Upon reading we can easily validate by checking the hash
against the stored one and discard the cache in case the hashes don't match.
"""
Matt Mackall
repoview: fix 0L with pack/unpack for 2.4
r22282 h = util.sha1()
h.update(''.join(repo.heads()))
David Soria Parra
repoview: add caching bits...
r22150 h.update(str(hash(frozenset(hideable))))
return h.digest()
Pierre-Yves David
repoview: extract actual hidden cache writing in its own function...
r23378 def _writehiddencache(cachefile, cachehash, hidden):
"""write hidden data to a cache file"""
data = struct.pack('>%ii' % len(hidden), *sorted(hidden))
cachefile.write(struct.pack(">H", cacheversion))
cachefile.write(cachehash)
cachefile.write(data)
David Soria Parra
repoview: add caching bits...
r22150 def trywritehiddencache(repo, hideable, hidden):
"""write cache of hidden changesets to disk
Will not write the cache if a wlock cannot be obtained lazily.
The cache consists of a head of 22byte:
2 byte version number of the cache
20 byte sha1 to validate the cache
n*4 byte hidden revs
"""
wlock = fh = None
try:
Matt Mackall
repoview: use try/except/finally
r25086 wlock = repo.wlock(wait=False)
# write cache to file
newhash = cachehash(repo, hideable)
fh = repo.vfs.open(cachefile, 'w+b', atomictemp=True)
_writehiddencache(fh, newhash, hidden)
except (IOError, OSError):
Laurent Charignon
repoview: add missing newline character in debug prints
r27916 repo.ui.debug('error writing hidden changesets cache\n')
Matt Mackall
repoview: use try/except/finally
r25086 except error.LockHeld:
Laurent Charignon
repoview: add missing newline character in debug prints
r27916 repo.ui.debug('cannot obtain lock to write hidden changesets cache\n')
David Soria Parra
repoview: add caching bits...
r22150 finally:
if fh:
fh.close()
if wlock:
wlock.release()
def tryreadcache(repo, hideable):
"""read a cache if the cache exists and is valid, otherwise returns None."""
hidden = fh = None
try:
if repo.vfs.exists(cachefile):
fh = repo.vfs.open(cachefile, 'rb')
version, = struct.unpack(">H", fh.read(2))
oldhash = fh.read(20)
newhash = cachehash(repo, hideable)
if (cacheversion, oldhash) == (version, newhash):
# cache is valid, so we can start reading the hidden revs
data = fh.read()
count = len(data) / 4
Matt Mackall
repoview: fix 0L with pack/unpack for 2.4
r22282 hidden = frozenset(struct.unpack('>%ii' % count, data))
David Soria Parra
repoview: add caching bits...
r22150 return hidden
Laurent Charignon
repoview: fix corrupted hiddencache crash Mercurial (issue5042)...
r27917 except struct.error:
repo.ui.debug('corrupted hidden cache\n')
# No need to fix the content as it will get rewritten
return None
except (IOError, OSError):
repo.ui.debug('cannot read hidden cache\n')
return None
David Soria Parra
repoview: add caching bits...
r22150 finally:
if fh:
fh.close()
Pierre-Yves David
clfilter: introduces a hidden filter...
r18242 def computehidden(repo):
"""compute the set of hidden revision to filter
During most operation hidden should be filtered."""
assert not repo.changelog.filteredrevs
David Soria Parra
repoview: cache hidden changesets...
r22151
David Soria Parra
repoview: split _gethiddenblockers...
r22149 hidden = frozenset()
Pierre-Yves David
repoview: extract hideable revision computation in a dedicated function...
r18293 hideable = hideablerevs(repo)
Pierre-Yves David
performance: speedup computation of hidden revisions...
r18272 if hideable:
cl = repo.changelog
David Soria Parra
repoview: cache hidden changesets...
r22151 hidden = tryreadcache(repo, hideable)
if hidden is None:
Durham Goode
repoview: improve compute staticblockers perf...
r24565 hidden = frozenset(_getstatichidden(repo))
David Soria Parra
repoview: cache hidden changesets...
r22151 trywritehiddencache(repo, hideable, hidden)
David Soria Parra
repoview: split _gethiddenblockers...
r22149
# check if we have wd parents, bookmarks or tags pointing to hidden
# changesets and remove those.
dynamic = hidden & _getdynamicblockers(repo)
if dynamic:
blocked = cl.ancestors(dynamic, inclusive=True)
hidden = frozenset(r for r in hidden if r not in blocked)
return hidden
Pierre-Yves David
clfilter: introduces a hidden filter...
r18242
Pierre-Yves David
clfilter: introduce a "unserver" filtering mode...
r18102 def computeunserved(repo):
"""compute the set of revision that should be filtered when used a server
Secret and hidden changeset should not pretend to be here."""
assert not repo.changelog.filteredrevs
# fast path in simple case to avoid impact of non optimised code
Kevin Bullock
filtering: rename filters to their antonyms...
r18382 hiddens = filterrevs(repo, 'visible')
Pierre-Yves David
performance: speedup computation of unserved revisions...
r18273 if phases.hassecret(repo):
cl = repo.changelog
secret = phases.secret
getphase = repo._phasecache.phase
first = min(cl.rev(n) for n in repo._phasecache.phaseroots[secret])
revs = cl.revs(start=first)
secrets = set(r for r in revs if getphase(repo, r) >= secret)
return frozenset(hiddens | secrets)
else:
return hiddens
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100
Pierre-Yves David
clfilter: add mutable filtering...
r18245 def computemutable(repo):
"""compute the set of revision that should be filtered when used a server
Secret and hidden changeset should not pretend to be here."""
assert not repo.changelog.filteredrevs
# fast check to avoid revset call on huge repo
Augie Fackler
cleanup: use __builtins__.any instead of util.any...
r25149 if any(repo._phasecache.phaseroots[1:]):
Pierre-Yves David
performance: speedup computation of mutable revisions...
r18274 getphase = repo._phasecache.phase
Kevin Bullock
filtering: rename filters to their antonyms...
r18382 maymutable = filterrevs(repo, 'base')
Pierre-Yves David
performance: speedup computation of mutable revisions...
r18274 return frozenset(r for r in maymutable if getphase(repo, r))
Pierre-Yves David
clfilter: add mutable filtering...
r18245 return frozenset()
Pierre-Yves David
clfilter: add impactable filter...
r18246 def computeimpactable(repo):
"""Everything impactable by mutable revision
Pierre-Yves David
documentation: update to new filter names...
r18462 The immutable filter still have some chance to get invalidated. This will
Pierre-Yves David
clfilter: add impactable filter...
r18246 happen when:
- you garbage collect hidden changeset,
- public phase is moved backward,
- something is changed in the filtering (this could be fixed)
This filter out any mutable changeset and any public changeset that may be
impacted by something happening to a mutable revision.
This is achieved by filtered everything with a revision number egal or
higher than the first mutable changeset is filtered."""
assert not repo.changelog.filteredrevs
cl = repo.changelog
firstmutable = len(cl)
for roots in repo._phasecache.phaseroots[1:]:
if roots:
firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
Pierre-Yves David
repoview: protect `base` computation from weird phase root...
r18443 # protect from nullrev root
firstmutable = max(0, firstmutable)
Pierre-Yves David
clfilter: add impactable filter...
r18246 return frozenset(xrange(firstmutable, len(cl)))
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 # function to compute filtered set
Pierre-Yves David
filter: add a comment so that people do not forget to update subsettable...
r20196 #
Mads Kiilerich
comments: fix minor spelling issues found with spell checker
r20549 # When adding a new filter you MUST update the table at:
Pierre-Yves David
filter: add a comment so that people do not forget to update subsettable...
r20196 # mercurial.branchmap.subsettable
# Otherwise your filter will have to recompute all its branches cache
# from scratch (very slow).
Kevin Bullock
filtering: rename filters to their antonyms...
r18382 filtertable = {'visible': computehidden,
'served': computeunserved,
'immutable': computemutable,
'base': computeimpactable}
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100
Kevin Bullock
filtering: rename filters to their antonyms...
r18382 def filterrevs(repo, filtername):
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 """returns set of filtered revision for this filter name"""
Pierre-Yves David
clfilter: add a cache on repo for set of revision to filter for a given set....
r18101 if filtername not in repo.filteredrevcache:
func = filtertable[filtername]
repo.filteredrevcache[filtername] = func(repo.unfiltered())
return repo.filteredrevcache[filtername]
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100
class repoview(object):
"""Provide a read/write view of a repo through a filtered changelog
This object is used to access a filtered version of a repository without
altering the original repository object itself. We can not alter the
original object for two main reasons:
- It prevents the use of a repo with multiple filters at the same time. In
particular when multiple threads are involved.
- It makes scope of the filtering harder to control.
This object behaves very closely to the original repository. All attribute
operations are done on the original repository:
- An access to `repoview.someattr` actually returns `repo.someattr`,
- A write to `repoview.someattr` actually sets value of `repo.someattr`,
- A deletion of `repoview.someattr` actually drops `someattr`
from `repo.__dict__`.
The only exception is the `changelog` property. It is overridden to return
a (surface) copy of `repo.changelog` with some revisions filtered. The
`filtername` attribute of the view control the revisions that need to be
filtered. (the fact the changelog is copied is an implementation detail).
Unlike attributes, this object intercepts all method calls. This means that
all methods are run on the `repoview` object with the filtered `changelog`
property. For this purpose the simple `repoview` class must be mixed with
the actual class of the repository. This ensures that the resulting
`repoview` object have the very same methods than the repo object. This
leads to the property below.
repoview.method() --> repo.__class__.method(repoview)
The inheritance has to be done dynamically because `repo` can be of any
Mads Kiilerich
spelling: fix some minor issues found by spell checker
r18644 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 """
def __init__(self, repo, filtername):
object.__setattr__(self, '_unfilteredrepo', repo)
object.__setattr__(self, 'filtername', filtername)
Pierre-Yves David
repoview: cache filtered changelog...
r18445 object.__setattr__(self, '_clcachekey', None)
object.__setattr__(self, '_clcache', None)
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100
Mads Kiilerich
spelling: fix some minor issues found by spell checker
r18644 # not a propertycache on purpose we shall implement a proper cache later
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 @property
def changelog(self):
"""return a filtered version of the changeset
this changelog must not be used for writing"""
# some cache may be implemented later
Pierre-Yves David
repoview: cache filtered changelog...
r18445 unfi = self._unfilteredrepo
unfichangelog = unfi.changelog
Pierre-Yves David
repoview: bypass changelog method to computed cache key...
r27258 # bypass call to changelog.method
unfiindex = unfichangelog.index
unfilen = len(unfiindex) - 1
unfinode = unfiindex[unfilen - 1][7]
Pierre-Yves David
repoview: cache filtered changelog...
r18445 revs = filterrevs(unfi, self.filtername)
cl = self._clcache
Pierre-Yves David
repoview: bypass changelog method to computed cache key...
r27258 newkey = (unfilen, unfinode, hash(revs), unfichangelog._delayed)
if cl is not None and newkey != self._clcachekey:
cl = None
Pierre-Yves David
repoview: cache filtered changelog...
r18445 # could have been made None by the previous if
if cl is None:
cl = copy.copy(unfichangelog)
cl.filteredrevs = revs
object.__setattr__(self, '_clcache', cl)
object.__setattr__(self, '_clcachekey', newkey)
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 return cl
def unfiltered(self):
"""Return an unfiltered version of a repo"""
return self._unfilteredrepo
def filtered(self, name):
"""Return a filtered version of a repository"""
if name == self.filtername:
return self
return self.unfiltered().filtered(name)
# everything access are forwarded to the proxied repo
def __getattr__(self, attr):
return getattr(self._unfilteredrepo, attr)
def __setattr__(self, attr, value):
return setattr(self._unfilteredrepo, attr, value)
def __delattr__(self, attr):
return delattr(self._unfilteredrepo, attr)
Mads Kiilerich
spelling: fix some minor issues found by spell checker
r18644 # The `requirements` attribute is initialized during __init__. But
Pierre-Yves David
clfilter: add actual repo filtering mechanism...
r18100 # __getattr__ won't be called as it also exists on the class. We need
# explicit forwarding to main repo here
@property
def requirements(self):
return self._unfilteredrepo.requirements