remotefilectx.py
535 lines
| 19.1 KiB
| text/x-python
|
PythonLexer
Augie Fackler
|
r40530 | # remotefilectx.py - filectx/workingfilectx implementations for remotefilelog | ||
# | ||||
# Copyright 2013 Facebook, Inc. | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Augie Fackler
|
r40530 | import collections | ||
import time | ||||
Joerg Sonnenberger
|
r47771 | from mercurial.node import bin, hex, nullrev | ||
Augie Fackler
|
r40530 | from mercurial import ( | ||
ancestor, | ||||
context, | ||||
error, | ||||
phases, | ||||
util, | ||||
) | ||||
from . import shallowutil | ||||
propertycache = util.propertycache | ||||
FASTLOG_TIMEOUT_IN_SECS = 0.5 | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40530 | class remotefilectx(context.filectx): | ||
Augie Fackler
|
r43346 | def __init__( | ||
self, | ||||
repo, | ||||
path, | ||||
changeid=None, | ||||
fileid=None, | ||||
filelog=None, | ||||
changectx=None, | ||||
ancestormap=None, | ||||
): | ||||
Augie Fackler
|
r40530 | if fileid == nullrev: | ||
Joerg Sonnenberger
|
r47771 | fileid = repo.nullid | ||
Augie Fackler
|
r40530 | if fileid and len(fileid) == 40: | ||
fileid = bin(fileid) | ||||
Augie Fackler
|
r43346 | super(remotefilectx, self).__init__( | ||
repo, path, changeid, fileid, filelog, changectx | ||||
) | ||||
Augie Fackler
|
r40530 | self._ancestormap = ancestormap | ||
def size(self): | ||||
return self._filelog.size(self._filenode) | ||||
@propertycache | ||||
def _changeid(self): | ||||
Augie Fackler
|
r43906 | if '_changeid' in self.__dict__: | ||
Augie Fackler
|
r40530 | return self._changeid | ||
Augie Fackler
|
r43906 | elif '_changectx' in self.__dict__: | ||
Augie Fackler
|
r40530 | return self._changectx.rev() | ||
Augie Fackler
|
r43906 | elif '_descendantrev' in self.__dict__: | ||
Augie Fackler
|
r40530 | # this file context was created from a revision with a known | ||
# descendant, we can (lazily) correct for linkrev aliases | ||||
Augie Fackler
|
r43346 | linknode = self._adjustlinknode( | ||
self._path, self._filelog, self._filenode, self._descendantrev | ||||
) | ||||
Augie Fackler
|
r40530 | return self._repo.unfiltered().changelog.rev(linknode) | ||
else: | ||||
return self.linkrev() | ||||
def filectx(self, fileid, changeid=None): | ||||
Augie Fackler
|
r46554 | """opens an arbitrary revision of the file without | ||
opening a new filelog""" | ||||
Augie Fackler
|
r43346 | return remotefilectx( | ||
self._repo, | ||||
self._path, | ||||
fileid=fileid, | ||||
filelog=self._filelog, | ||||
changeid=changeid, | ||||
) | ||||
Augie Fackler
|
r40530 | |||
def linkrev(self): | ||||
return self._linkrev | ||||
@propertycache | ||||
def _linkrev(self): | ||||
Joerg Sonnenberger
|
r47771 | if self._filenode == self._repo.nullid: | ||
Augie Fackler
|
r40530 | return nullrev | ||
ancestormap = self.ancestormap() | ||||
p1, p2, linknode, copyfrom = ancestormap[self._filenode] | ||||
r43970 | rev = self._repo.changelog.index.get_rev(linknode) | |||
Augie Fackler
|
r40530 | if rev is not None: | ||
return rev | ||||
# Search all commits for the appropriate linkrev (slow, but uncommon) | ||||
path = self._path | ||||
fileid = self._filenode | ||||
cl = self._repo.unfiltered().changelog | ||||
mfl = self._repo.manifestlog | ||||
for rev in range(len(cl) - 1, 0, -1): | ||||
node = cl.node(rev) | ||||
Augie Fackler
|
r43346 | data = cl.read( | ||
node | ||||
) # get changeset data (we avoid object creation) | ||||
if path in data[3]: # checking the 'files' field. | ||||
Augie Fackler
|
r40530 | # The file has been touched, check if the hash is what we're | ||
# looking for. | ||||
r52675 | # | |||
# The change has to be against a parent, otherwise we might be | ||||
# missing linkrev worthy changes. | ||||
m = mfl[data[0]].read_delta_parents(exact=False) | ||||
if fileid == m.get(path): | ||||
Augie Fackler
|
r40530 | return rev | ||
# Couldn't find the linkrev. This should generally not happen, and will | ||||
# likely cause a crash. | ||||
return None | ||||
def introrev(self): | ||||
"""return the rev of the changeset which introduced this file revision | ||||
This method is different from linkrev because it take into account the | ||||
changeset the filectx was created from. It ensures the returned | ||||
revision is one of its ancestors. This prevents bugs from | ||||
'linkrev-shadowing' when a file revision is used by multiple | ||||
changesets. | ||||
""" | ||||
lkr = self.linkrev() | ||||
attrs = vars(self) | ||||
Augie Fackler
|
r43906 | noctx = not ('_changeid' in attrs or r'_changectx' in attrs) | ||
Augie Fackler
|
r40530 | if noctx or self.rev() == lkr: | ||
return lkr | ||||
Augie Fackler
|
r43346 | linknode = self._adjustlinknode( | ||
self._path, | ||||
self._filelog, | ||||
self._filenode, | ||||
self.rev(), | ||||
inclusive=True, | ||||
) | ||||
Augie Fackler
|
r40530 | return self._repo.changelog.rev(linknode) | ||
def renamed(self): | ||||
"""check if file was actually renamed in this changeset revision | ||||
If rename logged in file revision, we report copy for changeset only | ||||
if file revisions linkrev points back to the changeset in question | ||||
or both changeset parents contain different file revisions. | ||||
""" | ||||
ancestormap = self.ancestormap() | ||||
p1, p2, linknode, copyfrom = ancestormap[self._filenode] | ||||
if not copyfrom: | ||||
return None | ||||
renamed = (copyfrom, p1) | ||||
if self.rev() == self.linkrev(): | ||||
return renamed | ||||
name = self.path() | ||||
fnode = self._filenode | ||||
for p in self._changectx.parents(): | ||||
try: | ||||
if fnode == p.filenode(name): | ||||
return None | ||||
except error.LookupError: | ||||
pass | ||||
return renamed | ||||
Martin von Zweigbergk
|
r41934 | def copysource(self): | ||
copy = self.renamed() | ||||
return copy and copy[0] | ||||
Augie Fackler
|
r40530 | def ancestormap(self): | ||
if not self._ancestormap: | ||||
self._ancestormap = self.filelog().ancestormap(self._filenode) | ||||
return self._ancestormap | ||||
def parents(self): | ||||
repo = self._repo | ||||
ancestormap = self.ancestormap() | ||||
p1, p2, linknode, copyfrom = ancestormap[self._filenode] | ||||
results = [] | ||||
Joerg Sonnenberger
|
r47771 | if p1 != repo.nullid: | ||
Augie Fackler
|
r40530 | path = copyfrom or self._path | ||
flog = repo.file(path) | ||||
Augie Fackler
|
r43346 | p1ctx = remotefilectx( | ||
repo, path, fileid=p1, filelog=flog, ancestormap=ancestormap | ||||
) | ||||
Augie Fackler
|
r40530 | p1ctx._descendantrev = self.rev() | ||
results.append(p1ctx) | ||||
Joerg Sonnenberger
|
r47771 | if p2 != repo.nullid: | ||
Augie Fackler
|
r40530 | path = self._path | ||
flog = repo.file(path) | ||||
Augie Fackler
|
r43346 | p2ctx = remotefilectx( | ||
repo, path, fileid=p2, filelog=flog, ancestormap=ancestormap | ||||
) | ||||
Augie Fackler
|
r40530 | p2ctx._descendantrev = self.rev() | ||
results.append(p2ctx) | ||||
return results | ||||
def _nodefromancrev(self, ancrev, cl, mfl, path, fnode): | ||||
"""returns the node for <path> in <ancrev> if content matches <fnode>""" | ||||
Augie Fackler
|
r43346 | ancctx = cl.read(ancrev) # This avoids object creation. | ||
Augie Fackler
|
r40530 | manifestnode, files = ancctx[0], ancctx[3] | ||
# If the file was touched in this ancestor, and the content is similar | ||||
# to the one we are searching for. | ||||
r52675 | if path in files: | |||
m = mfl[manifestnode].read_delta_parents(exact=False) | ||||
if fnode == m.get(path): | ||||
return cl.node(ancrev) | ||||
Augie Fackler
|
r40530 | return None | ||
def _adjustlinknode(self, path, filelog, fnode, srcrev, inclusive=False): | ||||
"""return the first ancestor of <srcrev> introducing <fnode> | ||||
If the linkrev of the file revision does not point to an ancestor of | ||||
srcrev, we'll walk down the ancestors until we find one introducing | ||||
this file revision. | ||||
:repo: a localrepository object (used to access changelog and manifest) | ||||
:path: the file path | ||||
:fnode: the nodeid of the file revision | ||||
:filelog: the filelog of this path | ||||
:srcrev: the changeset revision we search ancestors from | ||||
:inclusive: if true, the src revision will also be checked | ||||
Note: This is based on adjustlinkrev in core, but it's quite different. | ||||
adjustlinkrev depends on the fact that the linkrev is the bottom most | ||||
node, and uses that as a stopping point for the ancestor traversal. We | ||||
can't do that here because the linknode is not guaranteed to be the | ||||
bottom most one. | ||||
In our code here, we actually know what a bunch of potential ancestor | ||||
linknodes are, so instead of stopping the cheap-ancestor-traversal when | ||||
we get to a linkrev, we stop when we see any of the known linknodes. | ||||
""" | ||||
repo = self._repo | ||||
cl = repo.unfiltered().changelog | ||||
mfl = repo.manifestlog | ||||
ancestormap = self.ancestormap() | ||||
linknode = ancestormap[fnode][2] | ||||
if srcrev is None: | ||||
# wctx case, used by workingfilectx during mergecopy | ||||
revs = [p.rev() for p in self._repo[None].parents()] | ||||
Augie Fackler
|
r43346 | inclusive = True # we skipped the real (revless) source | ||
Augie Fackler
|
r40530 | else: | ||
revs = [srcrev] | ||||
if self._verifylinknode(revs, linknode): | ||||
return linknode | ||||
commonlogkwargs = { | ||||
Augie Fackler
|
r43906 | 'revs': b' '.join([hex(cl.node(rev)) for rev in revs]), | ||
'fnode': hex(fnode), | ||||
'filepath': path, | ||||
'user': shallowutil.getusername(repo.ui), | ||||
'reponame': shallowutil.getreponame(repo.ui), | ||||
Augie Fackler
|
r40530 | } | ||
Augie Fackler
|
r43347 | repo.ui.log(b'linkrevfixup', b'adjusting linknode\n', **commonlogkwargs) | ||
Augie Fackler
|
r40530 | |||
pc = repo._phasecache | ||||
seenpublic = False | ||||
iteranc = cl.ancestors(revs, inclusive=inclusive) | ||||
for ancrev in iteranc: | ||||
# First, check locally-available history. | ||||
lnode = self._nodefromancrev(ancrev, cl, mfl, path, fnode) | ||||
if lnode is not None: | ||||
return lnode | ||||
# adjusting linknode can be super-slow. To mitigate the issue | ||||
# we use two heuristics: calling fastlog and forcing remotefilelog | ||||
# prefetch | ||||
if not seenpublic and pc.phase(repo, ancrev) == phases.public: | ||||
# TODO: there used to be a codepath to fetch linknodes | ||||
# from a server as a fast path, but it appeared to | ||||
# depend on an API FB added to their phabricator. | ||||
Augie Fackler
|
r43346 | lnode = self._forceprefetch( | ||
repo, path, fnode, revs, commonlogkwargs | ||||
) | ||||
Augie Fackler
|
r40530 | if lnode: | ||
return lnode | ||||
seenpublic = True | ||||
return linknode | ||||
Augie Fackler
|
r43346 | def _forceprefetch(self, repo, path, fnode, revs, commonlogkwargs): | ||
Augie Fackler
|
r40530 | # This next part is super non-obvious, so big comment block time! | ||
# | ||||
# It is possible to get extremely bad performance here when a fairly | ||||
# common set of circumstances occur when this extension is combined | ||||
# with a server-side commit rewriting extension like pushrebase. | ||||
# | ||||
# First, an engineer creates Commit A and pushes it to the server. | ||||
# While the server's data structure will have the correct linkrev | ||||
# for the files touched in Commit A, the client will have the | ||||
# linkrev of the local commit, which is "invalid" because it's not | ||||
# an ancestor of the main line of development. | ||||
# | ||||
# The client will never download the remotefilelog with the correct | ||||
# linkrev as long as nobody else touches that file, since the file | ||||
# data and history hasn't changed since Commit A. | ||||
# | ||||
# After a long time (or a short time in a heavily used repo), if the | ||||
# same engineer returns to change the same file, some commands -- | ||||
# such as amends of commits with file moves, logs, diffs, etc -- | ||||
# can trigger this _adjustlinknode code. In those cases, finding | ||||
# the correct rev can become quite expensive, as the correct | ||||
# revision is far back in history and we need to walk back through | ||||
# history to find it. | ||||
# | ||||
# In order to improve this situation, we force a prefetch of the | ||||
# remotefilelog data blob for the file we were called on. We do this | ||||
# at most once, when we first see a public commit in the history we | ||||
# are traversing. | ||||
# | ||||
# Forcing the prefetch means we will download the remote blob even | ||||
# if we have the "correct" blob in the local store. Since the union | ||||
# store checks the remote store first, this means we are much more | ||||
# likely to get the correct linkrev at this point. | ||||
# | ||||
# In rare circumstances (such as the server having a suboptimal | ||||
# linkrev for our use case), we will fall back to the old slow path. | ||||
# | ||||
# We may want to add additional heuristics here in the future if | ||||
# the slow path is used too much. One promising possibility is using | ||||
# obsolescence markers to find a more-likely-correct linkrev. | ||||
Augie Fackler
|
r43347 | logmsg = b'' | ||
Augie Fackler
|
r40530 | start = time.time() | ||
try: | ||||
repo.fileservice.prefetch([(path, hex(fnode))], force=True) | ||||
# Now that we've downloaded a new blob from the server, | ||||
# we need to rebuild the ancestor map to recompute the | ||||
# linknodes. | ||||
self._ancestormap = None | ||||
Augie Fackler
|
r43346 | linknode = self.ancestormap()[fnode][2] # 2 is linknode | ||
Augie Fackler
|
r40530 | if self._verifylinknode(revs, linknode): | ||
Augie Fackler
|
r43347 | logmsg = b'remotefilelog prefetching succeeded' | ||
Augie Fackler
|
r40530 | return linknode | ||
Augie Fackler
|
r43347 | logmsg = b'remotefilelog prefetching not found' | ||
Augie Fackler
|
r40530 | return None | ||
except Exception as e: | ||||
Augie Fackler
|
r43347 | logmsg = b'remotefilelog prefetching failed (%s)' % e | ||
Augie Fackler
|
r40530 | return None | ||
finally: | ||||
elapsed = time.time() - start | ||||
Augie Fackler
|
r43346 | repo.ui.log( | ||
Augie Fackler
|
r43347 | b'linkrevfixup', | ||
logmsg + b'\n', | ||||
Augie Fackler
|
r43346 | elapsed=elapsed * 1000, | ||
Matt Harbison
|
r52755 | **commonlogkwargs, | ||
Augie Fackler
|
r43346 | ) | ||
Augie Fackler
|
r40530 | |||
def _verifylinknode(self, revs, linknode): | ||||
""" | ||||
Check if a linknode is correct one for the current history. | ||||
That is, return True if the linkrev is the ancestor of any of the | ||||
passed in revs, otherwise return False. | ||||
`revs` is a list that usually has one element -- usually the wdir parent | ||||
or the user-passed rev we're looking back from. It may contain two revs | ||||
when there is a merge going on, or zero revs when a root node with no | ||||
parents is being created. | ||||
""" | ||||
if not revs: | ||||
return False | ||||
try: | ||||
# Use the C fastpath to check if the given linknode is correct. | ||||
cl = self._repo.unfiltered().changelog | ||||
return any(cl.isancestor(linknode, cl.node(r)) for r in revs) | ||||
except error.LookupError: | ||||
# The linknode read from the blob may have been stripped or | ||||
# otherwise not present in the repository anymore. Do not fail hard | ||||
# in this case. Instead, return false and continue the search for | ||||
# the correct linknode. | ||||
return False | ||||
def ancestors(self, followfirst=False): | ||||
ancestors = [] | ||||
queue = collections.deque((self,)) | ||||
seen = set() | ||||
while queue: | ||||
current = queue.pop() | ||||
if current.filenode() in seen: | ||||
continue | ||||
seen.add(current.filenode()) | ||||
ancestors.append(current) | ||||
parents = current.parents() | ||||
first = True | ||||
for p in parents: | ||||
if first or not followfirst: | ||||
queue.append(p) | ||||
first = False | ||||
# Remove self | ||||
ancestors.pop(0) | ||||
# Sort by linkrev | ||||
# The copy tracing algorithm depends on these coming out in order | ||||
Augie Fackler
|
r43346 | ancestors = sorted(ancestors, reverse=True, key=lambda x: x.linkrev()) | ||
Augie Fackler
|
r40530 | |||
for ancestor in ancestors: | ||||
yield ancestor | ||||
def ancestor(self, fc2, actx): | ||||
# the easy case: no (relevant) renames | ||||
if fc2.path() == self.path() and self.path() in actx: | ||||
return actx[self.path()] | ||||
# the next easiest cases: unambiguous predecessor (name trumps | ||||
# history) | ||||
if self.path() in actx and fc2.path() not in actx: | ||||
return actx[self.path()] | ||||
if fc2.path() in actx and self.path() not in actx: | ||||
return actx[fc2.path()] | ||||
# do a full traversal | ||||
amap = self.ancestormap() | ||||
bmap = fc2.ancestormap() | ||||
def parents(x): | ||||
f, n = x | ||||
p = amap.get(n) or bmap.get(n) | ||||
if not p: | ||||
return [] | ||||
return [(p[3] or f, p[0]), (f, p[1])] | ||||
a = (self.path(), self.filenode()) | ||||
b = (fc2.path(), fc2.filenode()) | ||||
result = ancestor.genericancestor(a, b, parents) | ||||
if result: | ||||
f, n = result | ||||
Augie Fackler
|
r43346 | r = remotefilectx(self._repo, f, fileid=n, ancestormap=amap) | ||
Augie Fackler
|
r40530 | return r | ||
return None | ||||
def annotate(self, *args, **kwargs): | ||||
introctx = self | ||||
Augie Fackler
|
r43906 | prefetchskip = kwargs.pop('prefetchskip', None) | ||
Augie Fackler
|
r40530 | if prefetchskip: | ||
# use introrev so prefetchskip can be accurately tested | ||||
introrev = self.introrev() | ||||
if self.rev() != introrev: | ||||
Augie Fackler
|
r43346 | introctx = remotefilectx( | ||
self._repo, | ||||
self._path, | ||||
changeid=introrev, | ||||
fileid=self._filenode, | ||||
filelog=self._filelog, | ||||
ancestormap=self._ancestormap, | ||||
) | ||||
Augie Fackler
|
r40530 | |||
# like self.ancestors, but append to "fetch" and skip visiting parents | ||||
# of nodes in "prefetchskip". | ||||
fetch = [] | ||||
seen = set() | ||||
queue = collections.deque((introctx,)) | ||||
seen.add(introctx.node()) | ||||
while queue: | ||||
current = queue.pop() | ||||
if current.filenode() != self.filenode(): | ||||
# this is a "joint point". fastannotate needs contents of | ||||
# "joint point"s to calculate diffs for side branches. | ||||
fetch.append((current.path(), hex(current.filenode()))) | ||||
if prefetchskip and current in prefetchskip: | ||||
continue | ||||
for parent in current.parents(): | ||||
if parent.node() not in seen: | ||||
seen.add(parent.node()) | ||||
queue.append(parent) | ||||
Augie Fackler
|
r43346 | self._repo.ui.debug( | ||
Augie Fackler
|
r43347 | b'remotefilelog: prefetching %d files ' | ||
b'for annotate\n' % len(fetch) | ||||
Augie Fackler
|
r43346 | ) | ||
Augie Fackler
|
r40530 | if fetch: | ||
self._repo.fileservice.prefetch(fetch) | ||||
return super(remotefilectx, self).annotate(*args, **kwargs) | ||||
# Return empty set so that the hg serve and thg don't stack trace | ||||
def children(self): | ||||
return [] | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40530 | class remoteworkingfilectx(context.workingfilectx, remotefilectx): | ||
def __init__(self, repo, path, filelog=None, workingctx=None): | ||||
self._ancestormap = None | ||||
Augie Fackler
|
r43346 | super(remoteworkingfilectx, self).__init__( | ||
repo, path, filelog, workingctx | ||||
) | ||||
Augie Fackler
|
r40530 | |||
def parents(self): | ||||
return remotefilectx.parents(self) | ||||
def ancestormap(self): | ||||
if not self._ancestormap: | ||||
path = self._path | ||||
pcl = self._changectx._parents | ||||
renamed = self.renamed() | ||||
if renamed: | ||||
p1 = renamed | ||||
else: | ||||
Joerg Sonnenberger
|
r47771 | p1 = (path, pcl[0]._manifest.get(path, self._repo.nullid)) | ||
Augie Fackler
|
r40530 | |||
Joerg Sonnenberger
|
r47771 | p2 = (path, self._repo.nullid) | ||
Augie Fackler
|
r40530 | if len(pcl) > 1: | ||
Joerg Sonnenberger
|
r47771 | p2 = (path, pcl[1]._manifest.get(path, self._repo.nullid)) | ||
Augie Fackler
|
r40530 | |||
m = {} | ||||
Joerg Sonnenberger
|
r47771 | if p1[1] != self._repo.nullid: | ||
Augie Fackler
|
r40530 | p1ctx = self._repo.filectx(p1[0], fileid=p1[1]) | ||
m.update(p1ctx.filelog().ancestormap(p1[1])) | ||||
Joerg Sonnenberger
|
r47771 | if p2[1] != self._repo.nullid: | ||
Augie Fackler
|
r40530 | p2ctx = self._repo.filectx(p2[0], fileid=p2[1]) | ||
m.update(p2ctx.filelog().ancestormap(p2[1])) | ||||
Augie Fackler
|
r43347 | copyfrom = b'' | ||
Augie Fackler
|
r40530 | if renamed: | ||
copyfrom = renamed[0] | ||||
Joerg Sonnenberger
|
r47771 | m[None] = (p1[1], p2[1], self._repo.nullid, copyfrom) | ||
Augie Fackler
|
r40530 | self._ancestormap = m | ||
return self._ancestormap | ||||