##// END OF EJS Templates
rust-status: traverse working directory in parallel...
rust-status: traverse working directory in parallel Using `rayon` for this task ensures that we are using the same work-stealing threadpool for everything. This change introduces `crossbeam` as an explicit dependency, although it is already a dependency of `rayon`. It provides better structures for multi-threaded tasks than the stdlib. Differential Revision: https://phab.mercurial-scm.org/D8251

File last commit:

r44968:ec54b3d2 default
r45026:fe7d2cf0 default
Show More
gitlog.py
466 lines | 14.2 KiB | text/x-python | PythonLexer
from __future__ import absolute_import
from mercurial.i18n import _
from mercurial import (
ancestor,
changelog as hgchangelog,
dagop,
encoding,
error,
manifest,
node as nodemod,
pycompat,
)
from mercurial.interfaces import (
repository,
util as interfaceutil,
)
from mercurial.utils import stringutil
from . import (
gitutil,
index,
manifest as gitmanifest,
)
pygit2 = gitutil.get_pygit2()
class baselog(object): # revlog.revlog):
"""Common implementations between changelog and manifestlog."""
def __init__(self, gr, db):
self.gitrepo = gr
self._db = db
def __len__(self):
return int(
self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0]
)
def rev(self, n):
if n == nodemod.nullid:
return -1
t = self._db.execute(
'SELECT rev FROM changelog WHERE node = ?', (gitutil.togitnode(n),)
).fetchone()
if t is None:
raise error.LookupError(n, b'00changelog.i', _(b'no node %d'))
return t[0]
def node(self, r):
if r == nodemod.nullrev:
return nodemod.nullid
t = self._db.execute(
'SELECT node FROM changelog WHERE rev = ?', (r,)
).fetchone()
if t is None:
raise error.LookupError(r, b'00changelog.i', _(b'no node'))
return nodemod.bin(t[0])
def hasnode(self, n):
t = self._db.execute(
'SELECT node FROM changelog WHERE node = ?', (n,)
).fetchone()
return t is not None
class baselogindex(object):
def __init__(self, log):
self._log = log
def has_node(self, n):
return self._log.rev(n) != -1
def __len__(self):
return len(self._log)
def __getitem__(self, idx):
p1rev, p2rev = self._log.parentrevs(idx)
# TODO: it's messy that the index leaks so far out of the
# storage layer that we have to implement things like reading
# this raw tuple, which exposes revlog internals.
return (
# Pretend offset is just the index, since we don't really care.
idx,
# Same with lengths
idx, # length
idx, # rawsize
-1, # delta base
idx, # linkrev TODO is this right?
p1rev,
p2rev,
self._log.node(idx),
)
# TODO: an interface for the changelog type?
class changelog(baselog):
def __contains__(self, rev):
try:
self.node(rev)
return True
except error.LookupError:
return False
def __iter__(self):
return iter(pycompat.xrange(len(self)))
@property
def filteredrevs(self):
# TODO: we should probably add a refs/hg/ namespace for hidden
# heads etc, but that's an idea for later.
return set()
@property
def index(self):
return baselogindex(self)
@property
def nodemap(self):
r = {
nodemod.bin(v[0]): v[1]
for v in self._db.execute('SELECT node, rev FROM changelog')
}
r[nodemod.nullid] = nodemod.nullrev
return r
def tip(self):
t = self._db.execute(
'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1'
).fetchone()
if t:
return nodemod.bin(t[0])
return nodemod.nullid
def revs(self, start=0, stop=None):
if stop is None:
stop = self.tip()
t = self._db.execute(
'SELECT rev FROM changelog '
'WHERE rev >= ? AND rev <= ? '
'ORDER BY REV ASC',
(start, stop),
)
return (int(r[0]) for r in t)
def _partialmatch(self, id):
if nodemod.wdirhex.startswith(id):
raise error.WdirUnsupported
candidates = [
nodemod.bin(x[0])
for x in self._db.execute(
'SELECT node FROM changelog WHERE node LIKE ?', (id + b'%',)
)
]
if nodemod.nullhex.startswith(id):
candidates.append(nodemod.nullid)
if len(candidates) > 1:
raise error.AmbiguousPrefixLookupError(
id, b'00changelog.i', _(b'ambiguous identifier')
)
if candidates:
return candidates[0]
return None
def flags(self, rev):
return 0
def shortest(self, node, minlength=1):
nodehex = nodemod.hex(node)
for attempt in pycompat.xrange(minlength, len(nodehex) + 1):
candidate = nodehex[:attempt]
matches = int(
self._db.execute(
'SELECT COUNT(*) FROM changelog WHERE node LIKE ?',
(pycompat.sysstr(candidate + b'%'),),
).fetchone()[0]
)
if matches == 1:
return candidate
return nodehex
def headrevs(self, revs=None):
realheads = [
int(x[0])
for x in self._db.execute(
'SELECT rev FROM changelog '
'INNER JOIN heads ON changelog.node = heads.node'
)
]
if revs:
return sorted([r for r in revs if r in realheads])
return sorted(realheads)
def changelogrevision(self, nodeorrev):
# Ensure we have a node id
if isinstance(nodeorrev, int):
n = self.node(nodeorrev)
else:
n = nodeorrev
# handle looking up nullid
if n == nodemod.nullid:
return hgchangelog._changelogrevision(extra={})
hn = gitutil.togitnode(n)
# We've got a real commit!
files = [
r[0]
for r in self._db.execute(
'SELECT filename FROM changedfiles '
'WHERE node = ? and filenode != ?',
(hn, gitutil.nullgit),
)
]
filesremoved = [
r[0]
for r in self._db.execute(
'SELECT filename FROM changedfiles '
'WHERE node = ? and filenode = ?',
(hn, nodemod.nullhex),
)
]
c = self.gitrepo[hn]
return hgchangelog._changelogrevision(
manifest=n, # pretend manifest the same as the commit node
user=b'%s <%s>'
% (c.author.name.encode('utf8'), c.author.email.encode('utf8')),
date=(c.author.time, -c.author.offset * 60),
files=files,
# TODO filesadded in the index
filesremoved=filesremoved,
description=c.message.encode('utf8'),
# TODO do we want to handle extra? how?
extra={b'branch': b'default'},
)
def ancestors(self, revs, stoprev=0, inclusive=False):
revs = list(revs)
tip = self.rev(self.tip())
for r in revs:
if r > tip:
raise IndexError(b'Invalid rev %r' % r)
return ancestor.lazyancestors(
self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive
)
# Cleanup opportunity: this is *identical* to the revlog.py version
def descendants(self, revs):
return dagop.descendantrevs(revs, self.revs, self.parentrevs)
def reachableroots(self, minroot, heads, roots, includepath=False):
return dagop._reachablerootspure(
self.parentrevs, minroot, roots, heads, includepath
)
# Cleanup opportunity: this is *identical* to the revlog.py version
def isancestor(self, a, b):
a, b = self.rev(a), self.rev(b)
return self.isancestorrev(a, b)
# Cleanup opportunity: this is *identical* to the revlog.py version
def isancestorrev(self, a, b):
if a == nodemod.nullrev:
return True
elif a == b:
return True
elif a > b:
return False
return bool(self.reachableroots(a, [b], [a], includepath=False))
def parentrevs(self, rev):
n = self.node(rev)
hn = gitutil.togitnode(n)
c = self.gitrepo[hn]
p1 = p2 = nodemod.nullrev
if c.parents:
p1 = self.rev(c.parents[0].id.raw)
if len(c.parents) > 2:
raise error.Abort(b'TODO octopus merge handling')
if len(c.parents) == 2:
p2 = self.rev(c.parents[1].id.raw)
return p1, p2
# Private method is used at least by the tags code.
_uncheckedparentrevs = parentrevs
def commonancestorsheads(self, a, b):
# TODO the revlog verson of this has a C path, so we probably
# need to optimize this...
a, b = self.rev(a), self.rev(b)
return [
self.node(n)
for n in ancestor.commonancestorsheads(self.parentrevs, a, b)
]
def branchinfo(self, rev):
"""Git doesn't do named branches, so just put everything on default."""
return b'default', False
def delayupdate(self, tr):
# TODO: I think we can elide this because we're just dropping
# an object in the git repo?
pass
def add(
self,
manifest,
files,
desc,
transaction,
p1,
p2,
user,
date=None,
extra=None,
p1copies=None,
p2copies=None,
filesadded=None,
filesremoved=None,
):
parents = []
hp1, hp2 = gitutil.togitnode(p1), gitutil.togitnode(p2)
if p1 != nodemod.nullid:
parents.append(hp1)
if p2 and p2 != nodemod.nullid:
parents.append(hp2)
assert date is not None
timestamp, tz = date
sig = pygit2.Signature(
encoding.unifromlocal(stringutil.person(user)),
encoding.unifromlocal(stringutil.email(user)),
timestamp,
-(tz // 60),
)
oid = self.gitrepo.create_commit(
None, sig, sig, desc, gitutil.togitnode(manifest), parents
)
# Set up an internal reference to force the commit into the
# changelog. Hypothetically, we could even use this refs/hg/
# namespace to allow for anonymous heads on git repos, which
# would be neat.
self.gitrepo.references.create(
'refs/hg/internal/latest-commit', oid, force=True
)
# Reindex now to pick up changes. We omit the progress
# callback because this will be very quick.
index._index_repo(self.gitrepo, self._db)
return oid.raw
class manifestlog(baselog):
def __getitem__(self, node):
return self.get(b'', node)
def get(self, relpath, node):
if node == nodemod.nullid:
# TODO: this should almost certainly be a memgittreemanifestctx
return manifest.memtreemanifestctx(self, relpath)
commit = self.gitrepo[gitutil.togitnode(node)]
t = commit.tree
if relpath:
parts = relpath.split(b'/')
for p in parts:
te = t[p]
t = self.gitrepo[te.id]
return gitmanifest.gittreemanifestctx(self.gitrepo, t)
@interfaceutil.implementer(repository.ifilestorage)
class filelog(baselog):
def __init__(self, gr, db, path):
super(filelog, self).__init__(gr, db)
assert isinstance(path, bytes)
self.path = path
def read(self, node):
if node == nodemod.nullid:
return b''
return self.gitrepo[gitutil.togitnode(node)].data
def lookup(self, node):
if len(node) not in (20, 40):
node = int(node)
if isinstance(node, int):
assert False, b'todo revnums for nodes'
if len(node) == 40:
node = nodemod.bin(node)
hnode = gitutil.togitnode(node)
if hnode in self.gitrepo:
return node
raise error.LookupError(self.path, node, _(b'no match found'))
def cmp(self, node, text):
"""Returns True if text is different than content at `node`."""
return self.read(node) != text
def add(self, text, meta, transaction, link, p1=None, p2=None):
assert not meta # Should we even try to handle this?
return self.gitrepo.create_blob(text).raw
def __iter__(self):
for clrev in self._db.execute(
'''
SELECT rev FROM changelog
INNER JOIN changedfiles ON changelog.node = changedfiles.node
WHERE changedfiles.filename = ? AND changedfiles.filenode != ?
''',
(pycompat.fsdecode(self.path), gitutil.nullgit),
):
yield clrev[0]
def linkrev(self, fr):
return fr
def rev(self, node):
row = self._db.execute(
'''
SELECT rev FROM changelog
INNER JOIN changedfiles ON changelog.node = changedfiles.node
WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''',
(pycompat.fsdecode(self.path), gitutil.togitnode(node)),
).fetchone()
if row is None:
raise error.LookupError(self.path, node, _(b'no such node'))
return int(row[0])
def node(self, rev):
maybe = self._db.execute(
'''SELECT filenode FROM changedfiles
INNER JOIN changelog ON changelog.node = changedfiles.node
WHERE changelog.rev = ? AND filename = ?
''',
(rev, pycompat.fsdecode(self.path)),
).fetchone()
if maybe is None:
raise IndexError('gitlog %r out of range %d' % (self.path, rev))
return nodemod.bin(maybe[0])
def parents(self, node):
gn = gitutil.togitnode(node)
gp = pycompat.fsdecode(self.path)
ps = []
for p in self._db.execute(
'''SELECT p1filenode, p2filenode FROM changedfiles
WHERE filenode = ? AND filename = ?
''',
(gn, gp),
).fetchone():
if p is None:
commit = self._db.execute(
"SELECT node FROM changedfiles "
"WHERE filenode = ? AND filename = ?",
(gn, gp),
).fetchone()[0]
# This filelog is missing some data. Build the
# filelog, then recurse (which will always find data).
if pycompat.ispy3:
commit = commit.decode('ascii')
index.fill_in_filelog(self.gitrepo, self._db, commit, gp, gn)
return self.parents(node)
else:
ps.append(nodemod.bin(p))
return ps
def renamed(self, node):
# TODO: renames/copies
return False