upstream/mercurial-mirror Commit - r44519:2d49482d

hgext: replace references to hashlib.sha1 with hashutil.sha1...

Augie Fackler -

r44519:2d49482d default

parent child

hgext/fastannotate/context.py

0 +5 -3

             # Copyright 2016-present Facebook. All Rights Reserved.
             #
             # context: context needed to annotate a file
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import contextlib
-            import hashlib
             import os
             from mercurial.i18n import _
             from mercurial.pycompat import (
                 getattr,
                 open,
                 setattr,
             )
             from mercurial import (
                 error,
                 linelog as linelogmod,
                 lock as lockmod,
                 mdiff,
                 node,
                 pycompat,
                 scmutil,
                 util,
             )
-            from mercurial.utils import stringutil
+            from mercurial.utils import (
+                hashutil,
+                stringutil,
+            )
             from . import (
                 error as faerror,
                 revmap as revmapmod,
             )
             # given path, get filelog, cached
             @util.lrucachefunc
             def _getflog(repo, path):
                 return repo.file(path)
             # extracted from mercurial.context.basefilectx.annotate
             def _parents(f, follow=True):
                 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
                 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
                 # from the topmost introrev (= srcrev) down to p.linkrev() if it
                 # isn't an ancestor of the srcrev.
                 f._changeid
                 pl = f.parents()
                 # Don't return renamed parents if we aren't following.
                 if not follow:
                     pl = [p for p in pl if p.path() == f.path()]
                 # renamed filectx won't have a filelog yet, so set it
                 # from the cache to save time
                 for p in pl:
                     if not '_filelog' in p.__dict__:
                         p._filelog = _getflog(f._repo, p.path())
                 return pl
             # extracted from mercurial.context.basefilectx.annotate. slightly modified
             # so it takes a fctx instead of a pair of text and fctx.
             def _decorate(fctx):
                 text = fctx.data()
                 linecount = text.count(b'\n')
                 if text and not text.endswith(b'\n'):
                     linecount += 1
                 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
             # extracted from mercurial.context.basefilectx.annotate. slightly modified
             # so it takes an extra "blocks" parameter calculated elsewhere, instead of
             # calculating diff here.
             def _pair(parent, child, blocks):
                 for (a1, a2, b1, b2), t in blocks:
                     # Changed blocks ('!') or blocks made only of blank lines ('~')
                     # belong to the child.
                     if t == b'=':
                         child[0][b1:b2] = parent[0][a1:a2]
                 return child
             # like scmutil.revsingle, but with lru cache, so their states (like manifests)
             # could be reused
             _revsingle = util.lrucachefunc(scmutil.revsingle)
             def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
                 """(repo, str, str) -> fctx
                 get the filectx object from repo, rev, path, in an efficient way.
                 if resolverev is True, "rev" is a revision specified by the revset
                 language, otherwise "rev" is a nodeid, or a revision number that can
                 be consumed by repo.__getitem__.
                 if adjustctx is not None, the returned fctx will point to a changeset
                 that introduces the change (last modified the file). if adjustctx
                 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
                 faster for big repos but is incorrect for some cases.
                 """
                 if resolverev and not isinstance(rev, int) and rev is not None:
                     ctx = _revsingle(repo, rev)
                 else:
                     ctx = repo[rev]
                 # If we don't need to adjust the linkrev, create the filectx using the
                 # changectx instead of using ctx[path]. This means it already has the
                 # changectx information, so blame -u will be able to look directly at the
                 # commitctx object instead of having to resolve it by going through the
                 # manifest. In a lazy-manifest world this can prevent us from downloading a
                 # lot of data.
                 if adjustctx is None:
                     # ctx.rev() is None means it's the working copy, which is a special
                     # case.
                     if ctx.rev() is None:
                         fctx = ctx[path]
                     else:
                         fctx = repo.filectx(path, changeid=ctx.rev())
                 else:
                     fctx = ctx[path]
                     if adjustctx == b'linkrev':
                         introrev = fctx.linkrev()
                     else:
                         introrev = fctx.introrev()
                     if introrev != ctx.rev():
                         fctx._changeid = introrev
                         fctx._changectx = repo[introrev]
                 return fctx
             # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
             def encodedir(path):
                 return (
                     path.replace(b'.hg/', b'.hg.hg/')
                     .replace(b'.l/', b'.l.hg/')
                     .replace(b'.m/', b'.m.hg/')
                     .replace(b'.lock/', b'.lock.hg/')
                 )
             def hashdiffopts(diffopts):
                 diffoptstr = stringutil.pprint(
                     sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults)
                 )
-                return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
+                return node.hex(hashutil.sha1(diffoptstr).digest())[:6]
             _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
             class annotateopts(object):
                 """like mercurial.mdiff.diffopts, but is for annotate
                 followrename: follow renames, like "hg annotate -f"
                 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
                 """
                 defaults = {
                     b'diffopts': None,
                     b'followrename': True,
                     b'followmerge': True,
                 }
                 def __init__(self, **opts):
                     opts = pycompat.byteskwargs(opts)
                     for k, v in pycompat.iteritems(self.defaults):
                         setattr(self, k, opts.get(k, v))
                 @util.propertycache
                 def shortstr(self):
                     """represent opts in a short string, suitable for a directory name"""
                     result = b''
                     if not self.followrename:
                         result += b'r0'
                     if not self.followmerge:
                         result += b'm0'
                     if self.diffopts is not None:
                         assert isinstance(self.diffopts, mdiff.diffopts)
                         diffopthash = hashdiffopts(self.diffopts)
                         if diffopthash != _defaultdiffopthash:
                             result += b'i' + diffopthash
                     return result or b'default'
             defaultopts = annotateopts()
             class _annotatecontext(object):
                 """do not use this class directly as it does not use lock to protect
                 writes. use "with annotatecontext(...)" instead.
                 """
                 def __init__(self, repo, path, linelogpath, revmappath, opts):
                     self.repo = repo
                     self.ui = repo.ui
                     self.path = path
                     self.opts = opts
                     self.linelogpath = linelogpath
                     self.revmappath = revmappath
                     self._linelog = None
                     self._revmap = None
                     self._node2path = {}  # {str: str}
                 @property
                 def linelog(self):
                     if self._linelog is None:
                         if os.path.exists(self.linelogpath):
                             with open(self.linelogpath, b'rb') as f:
                                 try:
                                     self._linelog = linelogmod.linelog.fromdata(f.read())
                                 except linelogmod.LineLogError:
                                     self._linelog = linelogmod.linelog()
                         else:
                             self._linelog = linelogmod.linelog()
                     return self._linelog
                 @property
                 def revmap(self):
                     if self._revmap is None:
                         self._revmap = revmapmod.revmap(self.revmappath)
                     return self._revmap
                 def close(self):
                     if self._revmap is not None:
                         self._revmap.flush()
                         self._revmap = None
                     if self._linelog is not None:
                         with open(self.linelogpath, b'wb') as f:
                             f.write(self._linelog.encode())
                         self._linelog = None
                 __del__ = close
                 def rebuild(self):
                     """delete linelog and revmap, useful for rebuilding"""
                     self.close()
                     self._node2path.clear()
                     _unlinkpaths([self.revmappath, self.linelogpath])
                 @property
                 def lastnode(self):
                     """return last node in revmap, or None if revmap is empty"""
                     if self._revmap is None:
                         # fast path, read revmap without loading its full content
                         return revmapmod.getlastnode(self.revmappath)
                     else:
                         return self._revmap.rev2hsh(self._revmap.maxrev)
                 def isuptodate(self, master, strict=True):
                     """return True if the revmap / linelog is up-to-date, or the file
                     does not exist in the master revision. False otherwise.
                     it tries to be fast and could return false negatives, because of the
                     use of linkrev instead of introrev.
                     useful for both server and client to decide whether to update
                     fastannotate cache or not.
                     if strict is True, even if fctx exists in the revmap, but is not the
                     last node, isuptodate will return False. it's good for performance - no
                     expensive check was done.
                     if strict is False, if fctx exists in the revmap, this function may
                     return True. this is useful for the client to skip downloading the
                     cache if the client's master is behind the server's.
                     """
                     lastnode = self.lastnode
                     try:
                         f = self._resolvefctx(master, resolverev=True)
                         # choose linkrev instead of introrev as the check is meant to be
                         # *fast*.
                         linknode = self.repo.changelog.node(f.linkrev())
                         if not strict and lastnode and linknode != lastnode:
                             # check if f.node() is in the revmap. note: this loads the
                             # revmap and can be slow.
                             return self.revmap.hsh2rev(linknode) is not None
                         # avoid resolving old manifest, or slow adjustlinkrev to be fast,
                         # false negatives are acceptable in this case.
                         return linknode == lastnode
                     except LookupError:
                         # master does not have the file, or the revmap is ahead
                         return True
                 def annotate(self, rev, master=None, showpath=False, showlines=False):
                     """incrementally update the cache so it includes revisions in the main
                     branch till 'master'. and run annotate on 'rev', which may or may not be
                     included in the main branch.
                     if master is None, do not update linelog.
                     the first value returned is the annotate result, it is [(node, linenum)]
                     by default. [(node, linenum, path)] if showpath is True.
                     if showlines is True, a second value will be returned, it is a list of
                     corresponding line contents.
                     """
                     # the fast path test requires commit hash, convert rev number to hash,
                     # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
                     # command could give us a revision number even if the user passes a
                     # commit hash.
                     if isinstance(rev, int):
                         rev = node.hex(self.repo.changelog.node(rev))
                     # fast path: if rev is in the main branch already
                     directly, revfctx = self.canannotatedirectly(rev)
                     if directly:
                         if self.ui.debugflag:
                             self.ui.debug(
                                 b'fastannotate: %s: using fast path '
                                 b'(resolved fctx: %s)\n'
                                 % (
                                     self.path,
                                     stringutil.pprint(util.safehasattr(revfctx, b'node')),
                                 )
                             )
                         return self.annotatedirectly(revfctx, showpath, showlines)
                     # resolve master
                     masterfctx = None
                     if master:
                         try:
                             masterfctx = self._resolvefctx(
                                 master, resolverev=True, adjustctx=True
                             )
                         except LookupError:  # master does not have the file
                             pass
                         else:
                             if masterfctx in self.revmap:  # no need to update linelog
                                 masterfctx = None
                     #                  ... - @ <- rev (can be an arbitrary changeset,
                     #                 /                not necessarily a descendant
                     #      master -> o                 of master)
                     #                |
                     #     a merge -> o         'o': new changesets in the main branch
                     #                |\        '#': revisions in the main branch that
                     #                o *            exist in linelog / revmap
                     #                | .       '*': changesets in side branches, or
                     # last master -> # .            descendants of master
                     #                | .
                     #                # *       joint: '#', and is a parent of a '*'
                     #                |/
                     #     a joint -> # ^^^^ --- side branches
                     #                |
                     #                ^ --- main branch (in linelog)
                     # these DFSes are similar to the traditional annotate algorithm.
                     # we cannot really reuse the code for perf reason.
                     # 1st DFS calculates merges, joint points, and needed.
                     # "needed" is a simple reference counting dict to free items in
                     # "hist", reducing its memory usage otherwise could be huge.
                     initvisit = [revfctx]
                     if masterfctx:
                         if masterfctx.rev() is None:
                             raise error.Abort(
                                 _(b'cannot update linelog to wdir()'),
                                 hint=_(b'set fastannotate.mainbranch'),
                             )
                         initvisit.append(masterfctx)
                     visit = initvisit[:]
                     pcache = {}
                     needed = {revfctx: 1}
                     hist = {}  # {fctx: ([(llrev or fctx, linenum)], text)}
                     while visit:
                         f = visit.pop()
                         if f in pcache or f in hist:
                             continue
                         if f in self.revmap:  # in the old main branch, it's a joint
                             llrev = self.revmap.hsh2rev(f.node())
                             self.linelog.annotate(llrev)
                             result = self.linelog.annotateresult
                             hist[f] = (result, f.data())
                             continue
                         pl = self._parentfunc(f)
                         pcache[f] = pl
                         for p in pl:
                             needed[p] = needed.get(p, 0) + 1
                             if p not in pcache:
                                 visit.append(p)
                     # 2nd (simple) DFS calculates new changesets in the main branch
                     # ('o' nodes in # the above graph), so we know when to update linelog.
                     newmainbranch = set()
                     f = masterfctx
                     while f and f not in self.revmap:
                         newmainbranch.add(f)
                         pl = pcache[f]
                         if pl:
                             f = pl[0]
                         else:
                             f = None
                             break
                     # f, if present, is the position where the last build stopped at, and
                     # should be the "master" last time. check to see if we can continue
                     # building the linelog incrementally. (we cannot if diverged)
                     if masterfctx is not None:
                         self._checklastmasterhead(f)
                     if self.ui.debugflag:
                         if newmainbranch:
                             self.ui.debug(
                                 b'fastannotate: %s: %d new changesets in the main'
                                 b' branch\n' % (self.path, len(newmainbranch))
                             )
                         elif not hist:  # no joints, no updates
                             self.ui.debug(
                                 b'fastannotate: %s: linelog cannot help in '
                                 b'annotating this revision\n' % self.path
                             )
                     # prepare annotateresult so we can update linelog incrementally
                     self.linelog.annotate(self.linelog.maxrev)
                     # 3rd DFS does the actual annotate
                     visit = initvisit[:]
                     progress = self.ui.makeprogress(
                         b'building cache', total=len(newmainbranch)
                     )
                     while visit:
                         f = visit[-1]
                         if f in hist:
                             visit.pop()
                             continue
                         ready = True
                         pl = pcache[f]
                         for p in pl:
                             if p not in hist:
                                 ready = False
                                 visit.append(p)
                         if not ready:
                             continue
                         visit.pop()
                         blocks = None  # mdiff blocks, used for appending linelog
                         ismainbranch = f in newmainbranch
                         # curr is the same as the traditional annotate algorithm,
                         # if we only care about linear history (do not follow merge),
                         # then curr is not actually used.
                         assert f not in hist
                         curr = _decorate(f)
                         for i, p in enumerate(pl):
                             bs = list(self._diffblocks(hist[p][1], curr[1]))
                             if i == 0 and ismainbranch:
                                 blocks = bs
                             curr = _pair(hist[p], curr, bs)
                             if needed[p] == 1:
                                 del hist[p]
                                 del needed[p]
                             else:
                                 needed[p] -= 1
                         hist[f] = curr
                         del pcache[f]
                         if ismainbranch:  # need to write to linelog
                             progress.increment()
                             bannotated = None
                             if len(pl) == 2 and self.opts.followmerge:  # merge
                                 bannotated = curr[0]
                             if blocks is None:  # no parents, add an empty one
                                 blocks = list(self._diffblocks(b'', curr[1]))
                             self._appendrev(f, blocks, bannotated)
                         elif showpath:  # not append linelog, but we need to record path
                             self._node2path[f.node()] = f.path()
                     progress.complete()
                     result = [
                         ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
                         for fr, l in hist[revfctx][0]
                     ]  # [(node, linenumber)]
                     return self._refineannotateresult(result, revfctx, showpath, showlines)
                 def canannotatedirectly(self, rev):
                     """(str) -> bool, fctx or node.
                     return (True, f) if we can annotate without updating the linelog, pass
                     f to annotatedirectly.
                     return (False, f) if we need extra calculation. f is the fctx resolved
                     from rev.
                     """
                     result = True
                     f = None
                     if not isinstance(rev, int) and rev is not None:
                         hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
                         if hsh is not None and (hsh, self.path) in self.revmap:
                             f = hsh
                     if f is None:
                         adjustctx = b'linkrev' if self._perfhack else True
                         f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
                         result = f in self.revmap
                         if not result and self._perfhack:
                             # redo the resolution without perfhack - as we are going to
                             # do write operations, we need a correct fctx.
                             f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
                     return result, f
                 def annotatealllines(self, rev, showpath=False, showlines=False):
                     """(rev : str) -> [(node : str, linenum : int, path : str)]
                     the result has the same format with annotate, but include all (including
                     deleted) lines up to rev. call this after calling annotate(rev, ...) for
                     better performance and accuracy.
                     """
                     revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
                     # find a chain from rev to anything in the mainbranch
                     if revfctx not in self.revmap:
                         chain = [revfctx]
                         a = b''
                         while True:
                             f = chain[-1]
                             pl = self._parentfunc(f)
                             if not pl:
                                 break
                             if pl[0] in self.revmap:
                                 a = pl[0].data()
                                 break
                             chain.append(pl[0])
                         # both self.linelog and self.revmap is backed by filesystem. now
                         # we want to modify them but do not want to write changes back to
                         # files. so we create in-memory objects and copy them. it's like
                         # a "fork".
                         linelog = linelogmod.linelog()
                         linelog.copyfrom(self.linelog)
                         linelog.annotate(linelog.maxrev)
                         revmap = revmapmod.revmap()
                         revmap.copyfrom(self.revmap)
                         for f in reversed(chain):
                             b = f.data()
                             blocks = list(self._diffblocks(a, b))
                             self._doappendrev(linelog, revmap, f, blocks)
                             a = b
                     else:
                         # fastpath: use existing linelog, revmap as we don't write to them
                         linelog = self.linelog
                         revmap = self.revmap
                     lines = linelog.getalllines()
                     hsh = revfctx.node()
                     llrev = revmap.hsh2rev(hsh)
                     result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
                     # cannot use _refineannotateresult since we need custom logic for
                     # resolving line contents
                     if showpath:
                         result = self._addpathtoresult(result, revmap)
                     if showlines:
                         linecontents = self._resolvelines(result, revmap, linelog)
                         result = (result, linecontents)
                     return result
                 def _resolvelines(self, annotateresult, revmap, linelog):
                     """(annotateresult) -> [line]. designed for annotatealllines.
                     this is probably the most inefficient code in the whole fastannotate
                     directory. but we have made a decision that the linelog does not
                     store line contents. so getting them requires random accesses to
                     the revlog data, since they can be many, it can be very slow.
                     """
                     # [llrev]
                     revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
                     result = [None] * len(annotateresult)
                     # {(rev, linenum): [lineindex]}
                     key2idxs = collections.defaultdict(list)
                     for i in pycompat.xrange(len(result)):
                         key2idxs[(revs[i], annotateresult[i][1])].append(i)
                     while key2idxs:
                         # find an unresolved line and its linelog rev to annotate
                         hsh = None
                         try:
                             for (rev, _linenum), idxs in pycompat.iteritems(key2idxs):
                                 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
                                     continue
                                 hsh = annotateresult[idxs[0]][0]
                                 break
                         except StopIteration:  # no more unresolved lines
                             return result
                         if hsh is None:
                             # the remaining key2idxs are not in main branch, resolving them
                             # using the hard way...
                             revlines = {}
                             for (rev, linenum), idxs in pycompat.iteritems(key2idxs):
                                 if rev not in revlines:
                                     hsh = annotateresult[idxs[0]][0]
                                     if self.ui.debugflag:
                                         self.ui.debug(
                                             b'fastannotate: reading %s line #%d '
                                             b'to resolve lines %r\n'
                                             % (node.short(hsh), linenum, idxs)
                                         )
                                     fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                                     lines = mdiff.splitnewlines(fctx.data())
                                     revlines[rev] = lines
                                 for idx in idxs:
                                     result[idx] = revlines[rev][linenum]
                             assert all(x is not None for x in result)
                             return result
                         # run the annotate and the lines should match to the file content
                         self.ui.debug(
                             b'fastannotate: annotate %s to resolve lines\n'
                             % node.short(hsh)
                         )
                         linelog.annotate(rev)
                         fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                         annotated = linelog.annotateresult
                         lines = mdiff.splitnewlines(fctx.data())
                         if len(lines) != len(annotated):
                             raise faerror.CorruptedFileError(b'unexpected annotated lines')
                         # resolve lines from the annotate result
                         for i, line in enumerate(lines):
                             k = annotated[i]
                             if k in key2idxs:
                                 for idx in key2idxs[k]:
                                     result[idx] = line
                                 del key2idxs[k]
                     return result
                 def annotatedirectly(self, f, showpath, showlines):
                     """like annotate, but when we know that f is in linelog.
                     f can be either a 20-char str (node) or a fctx. this is for perf - in
                     the best case, the user provides a node and we don't need to read the
                     filelog or construct any filecontext.
                     """
                     if isinstance(f, bytes):
                         hsh = f
                     else:
                         hsh = f.node()
                     llrev = self.revmap.hsh2rev(hsh)
                     if not llrev:
                         raise faerror.CorruptedFileError(
                             b'%s is not in revmap' % node.hex(hsh)
                         )
                     if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
                         raise faerror.CorruptedFileError(
                             b'%s is not in revmap mainbranch' % node.hex(hsh)
                         )
                     self.linelog.annotate(llrev)
                     result = [
                         (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
                     ]
                     return self._refineannotateresult(result, f, showpath, showlines)
                 def _refineannotateresult(self, result, f, showpath, showlines):
                     """add the missing path or line contents, they can be expensive.
                     f could be either node or fctx.
                     """
                     if showpath:
                         result = self._addpathtoresult(result)
                     if showlines:
                         if isinstance(f, bytes):  # f: node or fctx
                             llrev = self.revmap.hsh2rev(f)
                             fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
                         else:
                             fctx = f
                         lines = mdiff.splitnewlines(fctx.data())
                         if len(lines) != len(result):  # linelog is probably corrupted
                             raise faerror.CorruptedFileError()
                         result = (result, lines)
                     return result
                 def _appendrev(self, fctx, blocks, bannotated=None):
                     self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
                 def _diffblocks(self, a, b):
                     return mdiff.allblocks(a, b, self.opts.diffopts)
                 @staticmethod
                 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
                     """append a revision to linelog and revmap"""
                     def getllrev(f):
                         """(fctx) -> int"""
                         # f should not be a linelog revision
                         if isinstance(f, int):
                             raise error.ProgrammingError(b'f should not be an int')
                         # f is a fctx, allocate linelog rev on demand
                         hsh = f.node()
                         rev = revmap.hsh2rev(hsh)
                         if rev is None:
                             rev = revmap.append(hsh, sidebranch=True, path=f.path())
                         return rev
                     # append sidebranch revisions to revmap
                     siderevs = []
                     siderevmap = {}  # node: int
                     if bannotated is not None:
                         for (a1, a2, b1, b2), op in blocks:
                             if op != b'=':
                                 # f could be either linelong rev, or fctx.
                                 siderevs += [
                                     f
                                     for f, l in bannotated[b1:b2]
                                     if not isinstance(f, int)
                                 ]
                     siderevs = set(siderevs)
                     if fctx in siderevs:  # mainnode must be appended seperately
                         siderevs.remove(fctx)
                     for f in siderevs:
                         siderevmap[f] = getllrev(f)
                     # the changeset in the main branch, could be a merge
                     llrev = revmap.append(fctx.node(), path=fctx.path())
                     siderevmap[fctx] = llrev
                     for (a1, a2, b1, b2), op in reversed(blocks):
                         if op == b'=':
                             continue
                         if bannotated is None:
                             linelog.replacelines(llrev, a1, a2, b1, b2)
                         else:
                             blines = [
                                 ((r if isinstance(r, int) else siderevmap[r]), l)
                                 for r, l in bannotated[b1:b2]
                             ]
                             linelog.replacelines_vec(llrev, a1, a2, blines)
                 def _addpathtoresult(self, annotateresult, revmap=None):
                     """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
                     if revmap is None:
                         revmap = self.revmap
                     def _getpath(nodeid):
                         path = self._node2path.get(nodeid)
                         if path is None:
                             path = revmap.rev2path(revmap.hsh2rev(nodeid))
                             self._node2path[nodeid] = path
                         return path
                     return [(n, l, _getpath(n)) for n, l in annotateresult]
                 def _checklastmasterhead(self, fctx):
                     """check if fctx is the master's head last time, raise if not"""
                     if fctx is None:
                         llrev = 0
                     else:
                         llrev = self.revmap.hsh2rev(fctx.node())
                         if not llrev:
                             raise faerror.CannotReuseError()
                     if self.linelog.maxrev != llrev:
                         raise faerror.CannotReuseError()
                 @util.propertycache
                 def _parentfunc(self):
                     """-> (fctx) -> [fctx]"""
                     followrename = self.opts.followrename
                     followmerge = self.opts.followmerge
                     def parents(f):
                         pl = _parents(f, follow=followrename)
                         if not followmerge:
                             pl = pl[:1]
                         return pl
                     return parents
                 @util.propertycache
                 def _perfhack(self):
                     return self.ui.configbool(b'fastannotate', b'perfhack')
                 def _resolvefctx(self, rev, path=None, **kwds):
                     return resolvefctx(self.repo, rev, (path or self.path), **kwds)
             def _unlinkpaths(paths):
                 """silent, best-effort unlink"""
                 for path in paths:
                     try:
                         util.unlink(path)
                     except OSError:
                         pass
             class pathhelper(object):
                 """helper for getting paths for lockfile, linelog and revmap"""
                 def __init__(self, repo, path, opts=defaultopts):
                     # different options use different directories
                     self._vfspath = os.path.join(
                         b'fastannotate', opts.shortstr, encodedir(path)
                     )
                     self._repo = repo
                 @property
                 def dirname(self):
                     return os.path.dirname(self._repo.vfs.join(self._vfspath))
                 @property
                 def linelogpath(self):
                     return self._repo.vfs.join(self._vfspath + b'.l')
                 def lock(self):
                     return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
                 @property
                 def revmappath(self):
                     return self._repo.vfs.join(self._vfspath + b'.m')
             @contextlib.contextmanager
             def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
                 """context needed to perform (fast) annotate on a file
                 an annotatecontext of a single file consists of two structures: the
                 linelog and the revmap. this function takes care of locking. only 1
                 process is allowed to write that file's linelog and revmap at a time.
                 when something goes wrong, this function will assume the linelog and the
                 revmap are in a bad state, and remove them from disk.
                 use this function in the following way:
                     with annotatecontext(...) as actx:
                         actx. ....
                 """
                 helper = pathhelper(repo, path, opts)
                 util.makedirs(helper.dirname)
                 revmappath = helper.revmappath
                 linelogpath = helper.linelogpath
                 actx = None
                 try:
                     with helper.lock():
                         actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
                         if rebuild:
                             actx.rebuild()
                         yield actx
                 except Exception:
                     if actx is not None:
                         actx.rebuild()
                     repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
                     raise
                 finally:
                     if actx is not None:
                         actx.close()
             def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
                 """like annotatecontext but get the context from a fctx. convenient when
                 used in fctx.annotate
                 """
                 repo = fctx._repo
                 path = fctx._path
                 if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
                     follow = True
                 aopts = annotateopts(diffopts=diffopts, followrename=follow)
                 return annotatecontext(repo, path, aopts, rebuild)

hgext/fsmonitor/__init__.py

0 +5 -3

             # __init__.py - fsmonitor initialization and overrides
             #
             # Copyright 2013-2016 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
             Integrates the file-watching program Watchman with Mercurial to produce faster
             status results.
             On a particular Linux system, for a real-world repository with over 400,000
             files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
             system, with fsmonitor it takes about 0.3 seconds.
             fsmonitor requires no configuration -- it will tell Watchman about your
             repository as necessary. You'll need to install Watchman from
             https://facebook.github.io/watchman/ and make sure it is in your PATH.
             fsmonitor is incompatible with the largefiles and eol extensions, and
             will disable itself if any of those are active.
             The following configuration options exist:
             ::
                 [fsmonitor]
                 mode = {off, on, paranoid}
             When `mode = off`, fsmonitor will disable itself (similar to not loading the
             extension at all). When `mode = on`, fsmonitor will be enabled (the default).
             When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
             and ensure that the results are consistent.
             ::
                 [fsmonitor]
                 timeout = (float)
             A value, in seconds, that determines how long fsmonitor will wait for Watchman
             to return results. Defaults to `2.0`.
             ::
                 [fsmonitor]
                 blacklistusers = (list of userids)
             A list of usernames for which fsmonitor will disable itself altogether.
             ::
                 [fsmonitor]
                 walk_on_invalidate = (boolean)
             Whether or not to walk the whole repo ourselves when our cached state has been
             invalidated, for example when Watchman has been restarted or .hgignore rules
             have been changed. Walking the repo in that case can result in competing for
             I/O with Watchman. For large repos it is recommended to set this value to
             false. You may wish to set this to true if you have a very fast filesystem
             that can outpace the IPC overhead of getting the result data for the full repo
             from Watchman. Defaults to false.
             ::
                 [fsmonitor]
                 warn_when_unused = (boolean)
             Whether to print a warning during certain operations when fsmonitor would be
             beneficial to performance but isn't enabled.
             ::
                 [fsmonitor]
                 warn_update_file_count = (integer)
             If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
             be printed during working directory updates if this many files will be
             created.
             '''
             # Platforms Supported
             # ===================
             #
             # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
             #   even under severe loads.
             #
             # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
             #   turned on, on case-insensitive HFS+. There has been a reasonable amount of
             #   user testing under normal loads.
             #
             # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
             #   very little testing has been done.
             #
             # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
             #
             # Known Issues
             # ============
             #
             # * fsmonitor will disable itself if any of the following extensions are
             #   enabled: largefiles, inotify, eol; or if the repository has subrepos.
             # * fsmonitor will produce incorrect results if nested repos that are not
             #   subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
             #
             # The issues related to nested repos and subrepos are probably not fundamental
             # ones. Patches to fix them are welcome.
             from __future__ import absolute_import
             import codecs
-            import hashlib
             import os
             import stat
             import sys
             import tempfile
             import weakref
             from mercurial.i18n import _
             from mercurial.node import hex
             from mercurial.pycompat import open
             from mercurial import (
                 context,
                 encoding,
                 error,
                 extensions,
                 localrepo,
                 merge,
                 pathutil,
                 pycompat,
                 registrar,
                 scmutil,
                 util,
             )
             from mercurial import match as matchmod
-            from mercurial.utils import stringutil
+            from mercurial.utils import (
+                hashutil,
+                stringutil,
+            )
             from . import (
                 pywatchman,
                 state,
                 watchmanclient,
             )
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = b'ships-with-hg-core'
             configtable = {}
             configitem = registrar.configitem(configtable)
             configitem(
                 b'fsmonitor', b'mode', default=b'on',
             )
             configitem(
                 b'fsmonitor', b'walk_on_invalidate', default=False,
             )
             configitem(
                 b'fsmonitor', b'timeout', default=b'2',
             )
             configitem(
                 b'fsmonitor', b'blacklistusers', default=list,
             )
             configitem(
                 b'fsmonitor', b'watchman_exe', default=b'watchman',
             )
             configitem(
                 b'fsmonitor', b'verbose', default=True, experimental=True,
             )
             configitem(
                 b'experimental', b'fsmonitor.transaction_notify', default=False,
             )
             # This extension is incompatible with the following blacklisted extensions
             # and will disable itself when encountering one of these:
             _blacklist = [b'largefiles', b'eol']
             def debuginstall(ui, fm):
                 fm.write(
                     b"fsmonitor-watchman",
                     _(b"fsmonitor checking for watchman binary... (%s)\n"),
                     ui.configpath(b"fsmonitor", b"watchman_exe"),
                 )
                 root = tempfile.mkdtemp()
                 c = watchmanclient.client(ui, root)
                 err = None
                 try:
                     v = c.command(b"version")
                     fm.write(
                         b"fsmonitor-watchman-version",
                         _(b" watchman binary version %s\n"),
                         pycompat.bytestr(v["version"]),
                     )
                 except watchmanclient.Unavailable as e:
                     err = stringutil.forcebytestr(e)
                 fm.condwrite(
                     err,
                     b"fsmonitor-watchman-error",
                     _(b" watchman binary missing or broken: %s\n"),
                     err,
                 )
                 return 1 if err else 0
             def _handleunavailable(ui, state, ex):
                 """Exception handler for Watchman interaction exceptions"""
                 if isinstance(ex, watchmanclient.Unavailable):
                     # experimental config: fsmonitor.verbose
                     if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
                         if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
                             ui.warn(stringutil.forcebytestr(ex) + b'\n')
                     if ex.invalidate:
                         state.invalidate()
                     # experimental config: fsmonitor.verbose
                     if ui.configbool(b'fsmonitor', b'verbose'):
                         ui.log(
                             b'fsmonitor',
                             b'Watchman unavailable: %s\n',
                             stringutil.forcebytestr(ex.msg),
                         )
                 else:
                     ui.log(
                         b'fsmonitor',
                         b'Watchman exception: %s\n',
                         stringutil.forcebytestr(ex),
                     )
             def _hashignore(ignore):
                 """Calculate hash for ignore patterns and filenames
                 If this information changes between Mercurial invocations, we can't
                 rely on Watchman information anymore and have to re-scan the working
                 copy.
                 """
-                sha1 = hashlib.sha1()
+                sha1 = hashutil.sha1()
                 sha1.update(pycompat.byterepr(ignore))
                 return pycompat.sysbytes(sha1.hexdigest())
             _watchmanencoding = pywatchman.encoding.get_local_encoding()
             _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
             _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
             def _watchmantofsencoding(path):
                 """Fix path to match watchman and local filesystem encoding
                 watchman's paths encoding can differ from filesystem encoding. For example,
                 on Windows, it's always utf-8.
                 """
                 try:
                     decoded = path.decode(_watchmanencoding)
                 except UnicodeDecodeError as e:
                     raise error.Abort(
                         stringutil.forcebytestr(e), hint=b'watchman encoding error'
                     )
                 try:
                     encoded = decoded.encode(_fsencoding, 'strict')
                 except UnicodeEncodeError as e:
                     raise error.Abort(stringutil.forcebytestr(e))
                 return encoded
             def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
                 '''Replacement for dirstate.walk, hooking into Watchman.
                 Whenever full is False, ignored is False, and the Watchman client is
                 available, use Watchman combined with saved state to possibly return only a
                 subset of files.'''
                 def bail(reason):
                     self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
                     return orig(match, subrepos, unknown, ignored, full=True)
                 if full:
                     return bail(b'full rewalk requested')
                 if ignored:
                     return bail(b'listing ignored files')
                 if not self._watchmanclient.available():
                     return bail(b'client unavailable')
                 state = self._fsmonitorstate
                 clock, ignorehash, notefiles = state.get()
                 if not clock:
                     if state.walk_on_invalidate:
                         return bail(b'no clock')
                     # Initial NULL clock value, see
                     # https://facebook.github.io/watchman/docs/clockspec.html
                     clock = b'c:0:0'
                     notefiles = []
                 ignore = self._ignore
                 dirignore = self._dirignore
                 if unknown:
                     if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
                         # ignore list changed -- can't rely on Watchman state any more
                         if state.walk_on_invalidate:
                             return bail(b'ignore rules changed')
                         notefiles = []
                         clock = b'c:0:0'
                 else:
                     # always ignore
                     ignore = util.always
                     dirignore = util.always
                 matchfn = match.matchfn
                 matchalways = match.always()
                 dmap = self._map
                 if util.safehasattr(dmap, b'_map'):
                     # for better performance, directly access the inner dirstate map if the
                     # standard dirstate implementation is in use.
                     dmap = dmap._map
                 nonnormalset = self._map.nonnormalset
                 copymap = self._map.copymap
                 getkind = stat.S_IFMT
                 dirkind = stat.S_IFDIR
                 regkind = stat.S_IFREG
                 lnkkind = stat.S_IFLNK
                 join = self._join
                 normcase = util.normcase
                 fresh_instance = False
                 exact = skipstep3 = False
                 if match.isexact():  # match.exact
                     exact = True
                     dirignore = util.always  # skip step 2
                 elif match.prefix():  # match.match, no patterns
                     skipstep3 = True
                 if not exact and self._checkcase:
                     # note that even though we could receive directory entries, we're only
                     # interested in checking if a file with the same name exists. So only
                     # normalize files if possible.
                     normalize = self._normalizefile
                     skipstep3 = False
                 else:
                     normalize = None
                 # step 1: find all explicit files
                 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
                 skipstep3 = skipstep3 and not (work or dirsnotfound)
                 work = [d for d in work if not dirignore(d[0])]
                 if not work and (exact or skipstep3):
                     for s in subrepos:
                         del results[s]
                     del results[b'.hg']
                     return results
                 # step 2: query Watchman
                 try:
                     # Use the user-configured timeout for the query.
                     # Add a little slack over the top of the user query to allow for
                     # overheads while transferring the data
                     self._watchmanclient.settimeout(state.timeout + 0.1)
                     result = self._watchmanclient.command(
                         b'query',
                         {
                             b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
                             b'since': clock,
                             b'expression': [
                                 b'not',
                                 [
                                     b'anyof',
                                     [b'dirname', b'.hg'],
                                     [b'name', b'.hg', b'wholename'],
                                 ],
                             ],
                             b'sync_timeout': int(state.timeout * 1000),
                             b'empty_on_fresh_instance': state.walk_on_invalidate,
                         },
                     )
                 except Exception as ex:
                     _handleunavailable(self._ui, state, ex)
                     self._watchmanclient.clearconnection()
                     return bail(b'exception during run')
                 else:
                     # We need to propagate the last observed clock up so that we
                     # can use it for our next query
                     state.setlastclock(pycompat.sysbytes(result[b'clock']))
                     if result[b'is_fresh_instance']:
                         if state.walk_on_invalidate:
                             state.invalidate()
                             return bail(b'fresh instance')
                         fresh_instance = True
                         # Ignore any prior noteable files from the state info
                         notefiles = []
                 # for file paths which require normalization and we encounter a case
                 # collision, we store our own foldmap
                 if normalize:
                     foldmap = dict((normcase(k), k) for k in results)
                 switch_slashes = pycompat.ossep == b'\\'
                 # The order of the results is, strictly speaking, undefined.
                 # For case changes on a case insensitive filesystem we may receive
                 # two entries, one with exists=True and another with exists=False.
                 # The exists=True entries in the same response should be interpreted
                 # as being happens-after the exists=False entries due to the way that
                 # Watchman tracks files.  We use this property to reconcile deletes
                 # for name case changes.
                 for entry in result[b'files']:
                     fname = entry[b'name']
                     # Watchman always give us a str. Normalize to bytes on Python 3
                     # using Watchman's encoding, if needed.
                     if not isinstance(fname, bytes):
                         fname = fname.encode(_watchmanencoding)
                     if _fixencoding:
                         fname = _watchmantofsencoding(fname)
                     if switch_slashes:
                         fname = fname.replace(b'\\', b'/')
                     if normalize:
                         normed = normcase(fname)
                         fname = normalize(fname, True, True)
                         foldmap[normed] = fname
                     fmode = entry[b'mode']
                     fexists = entry[b'exists']
                     kind = getkind(fmode)
                     if b'/.hg/' in fname or fname.endswith(b'/.hg'):
                         return bail(b'nested-repo-detected')
                     if not fexists:
                         # if marked as deleted and we don't already have a change
                         # record, mark it as deleted.  If we already have an entry
                         # for fname then it was either part of walkexplicit or was
                         # an earlier result that was a case change
                         if (
                             fname not in results
                             and fname in dmap
                             and (matchalways or matchfn(fname))
                         ):
                             results[fname] = None
                     elif kind == dirkind:
                         if fname in dmap and (matchalways or matchfn(fname)):
                             results[fname] = None
                     elif kind == regkind or kind == lnkkind:
                         if fname in dmap:
                             if matchalways or matchfn(fname):
                                 results[fname] = entry
                         elif (matchalways or matchfn(fname)) and not ignore(fname):
                             results[fname] = entry
                     elif fname in dmap and (matchalways or matchfn(fname)):
                         results[fname] = None
                 # step 3: query notable files we don't already know about
                 # XXX try not to iterate over the entire dmap
                 if normalize:
                     # any notable files that have changed case will already be handled
                     # above, so just check membership in the foldmap
                     notefiles = set(
                         (
                             normalize(f, True, True)
                             for f in notefiles
                             if normcase(f) not in foldmap
                         )
                     )
                 visit = set(
                     (
                         f
                         for f in notefiles
                         if (
                             f not in results and matchfn(f) and (f in dmap or not ignore(f))
                         )
                     )
                 )
                 if not fresh_instance:
                     if matchalways:
                         visit.update(f for f in nonnormalset if f not in results)
                         visit.update(f for f in copymap if f not in results)
                     else:
                         visit.update(
                             f for f in nonnormalset if f not in results and matchfn(f)
                         )
                         visit.update(f for f in copymap if f not in results and matchfn(f))
                 else:
                     if matchalways:
                         visit.update(
                             f for f, st in pycompat.iteritems(dmap) if f not in results
                         )
                         visit.update(f for f in copymap if f not in results)
                     else:
                         visit.update(
                             f
                             for f, st in pycompat.iteritems(dmap)
                             if f not in results and matchfn(f)
                         )
                         visit.update(f for f in copymap if f not in results and matchfn(f))
                 audit = pathutil.pathauditor(self._root, cached=True).check
                 auditpass = [f for f in visit if audit(f)]
                 auditpass.sort()
                 auditfail = visit.difference(auditpass)
                 for f in auditfail:
                     results[f] = None
                 nf = iter(auditpass)
                 for st in util.statfiles([join(f) for f in auditpass]):
                     f = next(nf)
                     if st or f in dmap:
                         results[f] = st
                 for s in subrepos:
                     del results[s]
                 del results[b'.hg']
                 return results
             def overridestatus(
                 orig,
                 self,
                 node1=b'.',
                 node2=None,
                 match=None,
                 ignored=False,
                 clean=False,
                 unknown=False,
                 listsubrepos=False,
             ):
                 listignored = ignored
                 listclean = clean
                 listunknown = unknown
                 def _cmpsets(l1, l2):
                     try:
                         if b'FSMONITOR_LOG_FILE' in encoding.environ:
                             fn = encoding.environ[b'FSMONITOR_LOG_FILE']
                             f = open(fn, b'wb')
                         else:
                             fn = b'fsmonitorfail.log'
                             f = self.vfs.open(fn, b'wb')
                     except (IOError, OSError):
                         self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
                         return
                     try:
                         for i, (s1, s2) in enumerate(zip(l1, l2)):
                             if set(s1) != set(s2):
                                 f.write(b'sets at position %d are unequal\n' % i)
                                 f.write(b'watchman returned: %s\n' % s1)
                                 f.write(b'stat returned: %s\n' % s2)
                     finally:
                         f.close()
                 if isinstance(node1, context.changectx):
                     ctx1 = node1
                 else:
                     ctx1 = self[node1]
                 if isinstance(node2, context.changectx):
                     ctx2 = node2
                 else:
                     ctx2 = self[node2]
                 working = ctx2.rev() is None
                 parentworking = working and ctx1 == self[b'.']
                 match = match or matchmod.always()
                 # Maybe we can use this opportunity to update Watchman's state.
                 # Mercurial uses workingcommitctx and/or memctx to represent the part of
                 # the workingctx that is to be committed. So don't update the state in
                 # that case.
                 # HG_PENDING is set in the environment when the dirstate is being updated
                 # in the middle of a transaction; we must not update our state in that
                 # case, or we risk forgetting about changes in the working copy.
                 updatestate = (
                     parentworking
                     and match.always()
                     and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
                     and b'HG_PENDING' not in encoding.environ
                 )
                 try:
                     if self._fsmonitorstate.walk_on_invalidate:
                         # Use a short timeout to query the current clock.  If that
                         # takes too long then we assume that the service will be slow
                         # to answer our query.
                         # walk_on_invalidate indicates that we prefer to walk the
                         # tree ourselves because we can ignore portions that Watchman
                         # cannot and we tend to be faster in the warmer buffer cache
                         # cases.
                         self._watchmanclient.settimeout(0.1)
                     else:
                         # Give Watchman more time to potentially complete its walk
                         # and return the initial clock.  In this mode we assume that
                         # the filesystem will be slower than parsing a potentially
                         # very large Watchman result set.
                         self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
                     startclock = self._watchmanclient.getcurrentclock()
                 except Exception as ex:
                     self._watchmanclient.clearconnection()
                     _handleunavailable(self.ui, self._fsmonitorstate, ex)
                     # boo, Watchman failed. bail
                     return orig(
                         node1,
                         node2,
                         match,
                         listignored,
                         listclean,
                         listunknown,
                         listsubrepos,
                     )
                 if updatestate:
                     # We need info about unknown files. This may make things slower the
                     # first time, but whatever.
                     stateunknown = True
                 else:
                     stateunknown = listunknown
                 if updatestate:
                     ps = poststatus(startclock)
                     self.addpostdsstatus(ps)
                 r = orig(
                     node1, node2, match, listignored, listclean, stateunknown, listsubrepos
                 )
                 modified, added, removed, deleted, unknown, ignored, clean = r
                 if not listunknown:
                     unknown = []
                 # don't do paranoid checks if we're not going to query Watchman anyway
                 full = listclean or match.traversedir is not None
                 if self._fsmonitorstate.mode == b'paranoid' and not full:
                     # run status again and fall back to the old walk this time
                     self.dirstate._fsmonitordisable = True
                     # shut the UI up
                     quiet = self.ui.quiet
                     self.ui.quiet = True
                     fout, ferr = self.ui.fout, self.ui.ferr
                     self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
                     try:
                         rv2 = orig(
                             node1,
                             node2,
                             match,
                             listignored,
                             listclean,
                             listunknown,
                             listsubrepos,
                         )
                     finally:
                         self.dirstate._fsmonitordisable = False
                         self.ui.quiet = quiet
                         self.ui.fout, self.ui.ferr = fout, ferr
                     # clean isn't tested since it's set to True above
                     with self.wlock():
                         _cmpsets(
                             [modified, added, removed, deleted, unknown, ignored, clean],
                             rv2,
                         )
                     modified, added, removed, deleted, unknown, ignored, clean = rv2
                 return scmutil.status(
                     modified, added, removed, deleted, unknown, ignored, clean
                 )
             class poststatus(object):
                 def __init__(self, startclock):
                     self._startclock = startclock
                 def __call__(self, wctx, status):
                     clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
                     hashignore = _hashignore(wctx.repo().dirstate._ignore)
                     notefiles = (
                         status.modified
                         + status.added
                         + status.removed
                         + status.deleted
                         + status.unknown
                     )
                     wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
             def makedirstate(repo, dirstate):
                 class fsmonitordirstate(dirstate.__class__):
                     def _fsmonitorinit(self, repo):
                         # _fsmonitordisable is used in paranoid mode
                         self._fsmonitordisable = False
                         self._fsmonitorstate = repo._fsmonitorstate
                         self._watchmanclient = repo._watchmanclient
                         self._repo = weakref.proxy(repo)
                     def walk(self, *args, **kwargs):
                         orig = super(fsmonitordirstate, self).walk
                         if self._fsmonitordisable:
                             return orig(*args, **kwargs)
                         return overridewalk(orig, self, *args, **kwargs)
                     def rebuild(self, *args, **kwargs):
                         self._fsmonitorstate.invalidate()
                         return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
                     def invalidate(self, *args, **kwargs):
                         self._fsmonitorstate.invalidate()
                         return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
                 dirstate.__class__ = fsmonitordirstate
                 dirstate._fsmonitorinit(repo)
             def wrapdirstate(orig, self):
                 ds = orig(self)
                 # only override the dirstate when Watchman is available for the repo
                 if util.safehasattr(self, b'_fsmonitorstate'):
                     makedirstate(self, ds)
                 return ds
             def extsetup(ui):
                 extensions.wrapfilecache(
                     localrepo.localrepository, b'dirstate', wrapdirstate
                 )
                 if pycompat.isdarwin:
                     # An assist for avoiding the dangling-symlink fsevents bug
                     extensions.wrapfunction(os, b'symlink', wrapsymlink)
                 extensions.wrapfunction(merge, b'update', wrapupdate)
             def wrapsymlink(orig, source, link_name):
                 ''' if we create a dangling symlink, also touch the parent dir
                 to encourage fsevents notifications to work more correctly '''
                 try:
                     return orig(source, link_name)
                 finally:
                     try:
                         os.utime(os.path.dirname(link_name), None)
                     except OSError:
                         pass
             class state_update(object):
                 ''' This context manager is responsible for dispatching the state-enter
                     and state-leave signals to the watchman service. The enter and leave
                     methods can be invoked manually (for scenarios where context manager
                     semantics are not possible). If parameters oldnode and newnode are None,
                     they will be populated based on current working copy in enter and
                     leave, respectively. Similarly, if the distance is none, it will be
                     calculated based on the oldnode and newnode in the leave method.'''
                 def __init__(
                     self,
                     repo,
                     name,
                     oldnode=None,
                     newnode=None,
                     distance=None,
                     partial=False,
                 ):
                     self.repo = repo.unfiltered()
                     self.name = name
                     self.oldnode = oldnode
                     self.newnode = newnode
                     self.distance = distance
                     self.partial = partial
                     self._lock = None
                     self.need_leave = False
                 def __enter__(self):
                     self.enter()
                 def enter(self):
                     # Make sure we have a wlock prior to sending notifications to watchman.
                     # We don't want to race with other actors. In the update case,
                     # merge.update is going to take the wlock almost immediately. We are
                     # effectively extending the lock around several short sanity checks.
                     if self.oldnode is None:
                         self.oldnode = self.repo[b'.'].node()
                     if self.repo.currentwlock() is None:
                         if util.safehasattr(self.repo, b'wlocknostateupdate'):
                             self._lock = self.repo.wlocknostateupdate()
                         else:
                             self._lock = self.repo.wlock()
                     self.need_leave = self._state(b'state-enter', hex(self.oldnode))
                     return self
                 def __exit__(self, type_, value, tb):
                     abort = True if type_ else False
                     self.exit(abort=abort)
                 def exit(self, abort=False):
                     try:
                         if self.need_leave:
                             status = b'failed' if abort else b'ok'
                             if self.newnode is None:
                                 self.newnode = self.repo[b'.'].node()
                             if self.distance is None:
                                 self.distance = calcdistance(
                                     self.repo, self.oldnode, self.newnode
                                 )
                             self._state(b'state-leave', hex(self.newnode), status=status)
                     finally:
                         self.need_leave = False
                         if self._lock:
                             self._lock.release()
                 def _state(self, cmd, commithash, status=b'ok'):
                     if not util.safehasattr(self.repo, b'_watchmanclient'):
                         return False
                     try:
                         self.repo._watchmanclient.command(
                             cmd,
                             {
                                 b'name': self.name,
                                 b'metadata': {
                                     # the target revision
                                     b'rev': commithash,
                                     # approximate number of commits between current and target
                                     b'distance': self.distance if self.distance else 0,
                                     # success/failure (only really meaningful for state-leave)
                                     b'status': status,
                                     # whether the working copy parent is changing
                                     b'partial': self.partial,
                                 },
                             },
                         )
                         return True
                     except Exception as e:
                         # Swallow any errors; fire and forget
                         self.repo.ui.log(
                             b'watchman', b'Exception %s while running %s\n', e, cmd
                         )
                         return False
             # Estimate the distance between two nodes
             def calcdistance(repo, oldnode, newnode):
                 anc = repo.changelog.ancestor(oldnode, newnode)
                 ancrev = repo[anc].rev()
                 distance = abs(repo[oldnode].rev() - ancrev) + abs(
                     repo[newnode].rev() - ancrev
                 )
                 return distance
             # Bracket working copy updates with calls to the watchman state-enter
             # and state-leave commands.  This allows clients to perform more intelligent
             # settling during bulk file change scenarios
             # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
             def wrapupdate(
                 orig,
                 repo,
                 node,
                 branchmerge,
                 force,
                 ancestor=None,
                 mergeancestor=False,
                 labels=None,
                 matcher=None,
                 **kwargs
             ):
                 distance = 0
                 partial = True
                 oldnode = repo[b'.'].node()
                 newnode = repo[node].node()
                 if matcher is None or matcher.always():
                     partial = False
                     distance = calcdistance(repo.unfiltered(), oldnode, newnode)
                 with state_update(
                     repo,
                     name=b"hg.update",
                     oldnode=oldnode,
                     newnode=newnode,
                     distance=distance,
                     partial=partial,
                 ):
                     return orig(
                         repo,
                         node,
                         branchmerge,
                         force,
                         ancestor,
                         mergeancestor,
                         labels,
                         matcher,
                         **kwargs
                     )
             def repo_has_depth_one_nested_repo(repo):
                 for f in repo.wvfs.listdir():
                     if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
                         msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
                         repo.ui.debug(msg % f)
                         return True
                 return False
             def reposetup(ui, repo):
                 # We don't work with largefiles or inotify
                 exts = extensions.enabled()
                 for ext in _blacklist:
                     if ext in exts:
                         ui.warn(
                             _(
                                 b'The fsmonitor extension is incompatible with the %s '
                                 b'extension and has been disabled.\n'
                             )
                             % ext
                         )
                         return
                 if repo.local():
                     # We don't work with subrepos either.
                     #
                     # if repo[None].substate can cause a dirstate parse, which is too
                     # slow. Instead, look for a file called hgsubstate,
                     if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
                         return
                     if repo_has_depth_one_nested_repo(repo):
                         return
                     fsmonitorstate = state.state(repo)
                     if fsmonitorstate.mode == b'off':
                         return
                     try:
                         client = watchmanclient.client(repo.ui, repo.root)
                     except Exception as ex:
                         _handleunavailable(ui, fsmonitorstate, ex)
                         return
                     repo._fsmonitorstate = fsmonitorstate
                     repo._watchmanclient = client
                     dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
                     if cached:
                         # at this point since fsmonitorstate wasn't present,
                         # repo.dirstate is not a fsmonitordirstate
                         makedirstate(repo, dirstate)
                     class fsmonitorrepo(repo.__class__):
                         def status(self, *args, **kwargs):
                             orig = super(fsmonitorrepo, self).status
                             return overridestatus(orig, self, *args, **kwargs)
                         def wlocknostateupdate(self, *args, **kwargs):
                             return super(fsmonitorrepo, self).wlock(*args, **kwargs)
                         def wlock(self, *args, **kwargs):
                             l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
                             if not ui.configbool(
                                 b"experimental", b"fsmonitor.transaction_notify"
                             ):
                                 return l
                             if l.held != 1:
                                 return l
                             origrelease = l.releasefn
                             def staterelease():
                                 if origrelease:
                                     origrelease()
                                 if l.stateupdate:
                                     l.stateupdate.exit()
                                     l.stateupdate = None
                             try:
                                 l.stateupdate = None
                                 l.stateupdate = state_update(self, name=b"hg.transaction")
                                 l.stateupdate.enter()
                                 l.releasefn = staterelease
                             except Exception as e:
                                 # Swallow any errors; fire and forget
                                 self.ui.log(
                                     b'watchman', b'Exception in state update %s\n', e
                                 )
                             return l
                     repo.__class__ = fsmonitorrepo

hgext/infinitepush/store.py

0 +5 -3

             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             # based on bundleheads extension by Gregory Szorc <gps@mozilla.com>
             from __future__ import absolute_import
             import abc
-            import hashlib
             import os
             import subprocess
             import tempfile
             from mercurial.pycompat import open
             from mercurial import (
                 node,
                 pycompat,
             )
-            from mercurial.utils import procutil
+            from mercurial.utils import (
+                hashutil,
+                procutil,
+            )
             NamedTemporaryFile = tempfile.NamedTemporaryFile
             class BundleWriteException(Exception):
                 pass
             class BundleReadException(Exception):
                 pass
             class abstractbundlestore(object):  # pytype: disable=ignored-metaclass
                 """Defines the interface for bundle stores.
                 A bundle store is an entity that stores raw bundle data. It is a simple
                 key-value store. However, the keys are chosen by the store. The keys can
                 be any Python object understood by the corresponding bundle index (see
                 ``abstractbundleindex`` below).
                 """
                 __metaclass__ = abc.ABCMeta
                 @abc.abstractmethod
                 def write(self, data):
                     """Write bundle data to the store.
                     This function receives the raw data to be written as a str.
                     Throws BundleWriteException
                     The key of the written data MUST be returned.
                     """
                 @abc.abstractmethod
                 def read(self, key):
                     """Obtain bundle data for a key.
                     Returns None if the bundle isn't known.
                     Throws BundleReadException
                     The returned object should be a file object supporting read()
                     and close().
                     """
             class filebundlestore(object):
                 """bundle store in filesystem
                 meant for storing bundles somewhere on disk and on network filesystems
                 """
                 def __init__(self, ui, repo):
                     self.ui = ui
                     self.repo = repo
                     self.storepath = ui.configpath(b'scratchbranch', b'storepath')
                     if not self.storepath:
                         self.storepath = self.repo.vfs.join(
                             b"scratchbranches", b"filebundlestore"
                         )
                     if not os.path.exists(self.storepath):
                         os.makedirs(self.storepath)
                 def _dirpath(self, hashvalue):
                     """First two bytes of the hash are the name of the upper
                     level directory, next two bytes are the name of the
                     next level directory"""
                     return os.path.join(self.storepath, hashvalue[0:2], hashvalue[2:4])
                 def _filepath(self, filename):
                     return os.path.join(self._dirpath(filename), filename)
                 def write(self, data):
-                    filename = node.hex(hashlib.sha1(data).digest())
+                    filename = node.hex(hashutil.sha1(data).digest())
                     dirpath = self._dirpath(filename)
                     if not os.path.exists(dirpath):
                         os.makedirs(dirpath)
                     with open(self._filepath(filename), b'wb') as f:
                         f.write(data)
                     return filename
                 def read(self, key):
                     try:
                         with open(self._filepath(key), b'rb') as f:
                             return f.read()
                     except IOError:
                         return None
             class externalbundlestore(abstractbundlestore):
                 def __init__(self, put_binary, put_args, get_binary, get_args):
                     """
                     `put_binary` - path to binary file which uploads bundle to external
                         storage and prints key to stdout
                     `put_args` - format string with additional args to `put_binary`
                                  {filename} replacement field can be used.
                     `get_binary` - path to binary file which accepts filename and key
                         (in that order), downloads bundle from store and saves it to file
                     `get_args` - format string with additional args to `get_binary`.
                                  {filename} and {handle} replacement field can be used.
                     """
                     self.put_args = put_args
                     self.get_args = get_args
                     self.put_binary = put_binary
                     self.get_binary = get_binary
                 def _call_binary(self, args):
                     p = subprocess.Popen(
                         pycompat.rapply(procutil.tonativestr, args),
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         close_fds=True,
                     )
                     stdout, stderr = p.communicate()
                     returncode = p.returncode
                     return returncode, stdout, stderr
                 def write(self, data):
                     # Won't work on windows because you can't open file second time without
                     # closing it
                     # TODO: rewrite without str.format() and replace NamedTemporaryFile()
                     # with pycompat.namedtempfile()
                     with NamedTemporaryFile() as temp:
                         temp.write(data)
                         temp.flush()
                         temp.seek(0)
                         formatted_args = [
                             arg.format(filename=temp.name) for arg in self.put_args
                         ]
                         returncode, stdout, stderr = self._call_binary(
                             [self.put_binary] + formatted_args
                         )
                         if returncode != 0:
                             raise BundleWriteException(
                                 b'Failed to upload to external store: %s' % stderr
                             )
                         stdout_lines = stdout.splitlines()
                         if len(stdout_lines) == 1:
                             return stdout_lines[0]
                         else:
                             raise BundleWriteException(
                                 b'Bad output from %s: %s' % (self.put_binary, stdout)
                             )
                 def read(self, handle):
                     # Won't work on windows because you can't open file second time without
                     # closing it
                     # TODO: rewrite without str.format() and replace NamedTemporaryFile()
                     # with pycompat.namedtempfile()
                     with NamedTemporaryFile() as temp:
                         formatted_args = [
                             arg.format(filename=temp.name, handle=handle)
                             for arg in self.get_args
                         ]
                         returncode, stdout, stderr = self._call_binary(
                             [self.get_binary] + formatted_args
                         )
                         if returncode != 0:
                             raise BundleReadException(
                                 b'Failed to download from external store: %s' % stderr
                             )
                         return temp.read()

hgext/largefiles/lfcommands.py

0 +2 -2

             # Copyright 2009-2010 Gregory P. Ward
             # Copyright 2009-2010 Intelerad Medical Systems Incorporated
             # Copyright 2010-2011 Fog Creek Software
             # Copyright 2010-2011 Unity Technologies
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''High-level command function for lfconvert, plus the cmdtable.'''
             from __future__ import absolute_import
             import errno
-            import hashlib
             import os
             import shutil
             from mercurial.i18n import _
             from mercurial import (
                 cmdutil,
                 context,
                 error,
                 exthelper,
                 hg,
                 lock,
                 match as matchmod,
                 node,
                 pycompat,
                 scmutil,
                 util,
             )
+            from mercurial.utils import hashutil
             from ..convert import (
                 convcmd,
                 filemap,
             )
             from . import lfutil, storefactory
             release = lock.release
             # -- Commands ----------------------------------------------------------
             eh = exthelper.exthelper()
             @eh.command(
                 b'lfconvert',
                 [
                     (
                         b's',
                         b'size',
                         b'',
                         _(b'minimum size (MB) for files to be converted as largefiles'),
                         b'SIZE',
                     ),
                     (
                         b'',
                         b'to-normal',
                         False,
                         _(b'convert from a largefiles repo to a normal repo'),
                     ),
                 ],
                 _(b'hg lfconvert SOURCE DEST [FILE ...]'),
                 norepo=True,
                 inferrepo=True,
             )
             def lfconvert(ui, src, dest, *pats, **opts):
                 '''convert a normal repository to a largefiles repository
                 Convert repository SOURCE to a new repository DEST, identical to
                 SOURCE except that certain files will be converted as largefiles:
                 specifically, any file that matches any PATTERN *or* whose size is
                 above the minimum size threshold is converted as a largefile. The
                 size used to determine whether or not to track a file as a
                 largefile is the size of the first version of the file. The
                 minimum size can be specified either with --size or in
                 configuration as ``largefiles.size``.
                 After running this command you will need to make sure that
                 largefiles is enabled anywhere you intend to push the new
                 repository.
                 Use --to-normal to convert largefiles back to normal files; after
                 this, the DEST repository can be used without largefiles at all.'''
                 opts = pycompat.byteskwargs(opts)
                 if opts[b'to_normal']:
                     tolfile = False
                 else:
                     tolfile = True
                     size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None)
                 if not hg.islocal(src):
                     raise error.Abort(_(b'%s is not a local Mercurial repo') % src)
                 if not hg.islocal(dest):
                     raise error.Abort(_(b'%s is not a local Mercurial repo') % dest)
                 rsrc = hg.repository(ui, src)
                 ui.status(_(b'initializing destination %s\n') % dest)
                 rdst = hg.repository(ui, dest, create=True)
                 success = False
                 dstwlock = dstlock = None
                 try:
                     # Get a list of all changesets in the source.  The easy way to do this
                     # is to simply walk the changelog, using changelog.nodesbetween().
                     # Take a look at mercurial/revlog.py:639 for more details.
                     # Use a generator instead of a list to decrease memory usage
                     ctxs = (
                         rsrc[ctx]
                         for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]
                     )
                     revmap = {node.nullid: node.nullid}
                     if tolfile:
                         # Lock destination to prevent modification while it is converted to.
                         # Don't need to lock src because we are just reading from its
                         # history which can't change.
                         dstwlock = rdst.wlock()
                         dstlock = rdst.lock()
                         lfiles = set()
                         normalfiles = set()
                         if not pats:
                             pats = ui.configlist(lfutil.longname, b'patterns')
                         if pats:
                             matcher = matchmod.match(rsrc.root, b'', list(pats))
                         else:
                             matcher = None
                         lfiletohash = {}
                         with ui.makeprogress(
                             _(b'converting revisions'),
                             unit=_(b'revisions'),
                             total=rsrc[b'tip'].rev(),
                         ) as progress:
                             for ctx in ctxs:
                                 progress.update(ctx.rev())
                                 _lfconvert_addchangeset(
                                     rsrc,
                                     rdst,
                                     ctx,
                                     revmap,
                                     lfiles,
                                     normalfiles,
                                     matcher,
                                     size,
                                     lfiletohash,
                                 )
                         if rdst.wvfs.exists(lfutil.shortname):
                             rdst.wvfs.rmtree(lfutil.shortname)
                         for f in lfiletohash.keys():
                             if rdst.wvfs.isfile(f):
                                 rdst.wvfs.unlink(f)
                             try:
                                 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
                             except OSError:
                                 pass
                         # If there were any files converted to largefiles, add largefiles
                         # to the destination repository's requirements.
                         if lfiles:
                             rdst.requirements.add(b'largefiles')
                             rdst._writerequirements()
                     else:
                         class lfsource(filemap.filemap_source):
                             def __init__(self, ui, source):
                                 super(lfsource, self).__init__(ui, source, None)
                                 self.filemapper.rename[lfutil.shortname] = b'.'
                             def getfile(self, name, rev):
                                 realname, realrev = rev
                                 f = super(lfsource, self).getfile(name, rev)
                                 if (
                                     not realname.startswith(lfutil.shortnameslash)
                                     or f[0] is None
                                 ):
                                     return f
                                 # Substitute in the largefile data for the hash
                                 hash = f[0].strip()
                                 path = lfutil.findfile(rsrc, hash)
                                 if path is None:
                                     raise error.Abort(
                                         _(b"missing largefile for '%s' in %s")
                                         % (realname, realrev)
                                     )
                                 return util.readfile(path), f[1]
                         class converter(convcmd.converter):
                             def __init__(self, ui, source, dest, revmapfile, opts):
                                 src = lfsource(ui, source)
                                 super(converter, self).__init__(
                                     ui, src, dest, revmapfile, opts
                                 )
                         found, missing = downloadlfiles(ui, rsrc)
                         if missing != 0:
                             raise error.Abort(_(b"all largefiles must be present locally"))
                         orig = convcmd.converter
                         convcmd.converter = converter
                         try:
                             convcmd.convert(
                                 ui, src, dest, source_type=b'hg', dest_type=b'hg'
                             )
                         finally:
                             convcmd.converter = orig
                     success = True
                 finally:
                     if tolfile:
                         rdst.dirstate.clear()
                         release(dstlock, dstwlock)
                     if not success:
                         # we failed, remove the new directory
                         shutil.rmtree(rdst.root)
             def _lfconvert_addchangeset(
                 rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash
             ):
                 # Convert src parents to dst parents
                 parents = _convertparents(ctx, revmap)
                 # Generate list of changed files
                 files = _getchangedfiles(ctx, parents)
                 dstfiles = []
                 for f in files:
                     if f not in lfiles and f not in normalfiles:
                         islfile = _islfile(f, ctx, matcher, size)
                         # If this file was renamed or copied then copy
                         # the largefile-ness of its predecessor
                         if f in ctx.manifest():
                             fctx = ctx.filectx(f)
                             renamed = fctx.copysource()
                             if renamed is None:
                                 # the code below assumes renamed to be a boolean or a list
                                 # and won't quite work with the value None
                                 renamed = False
                             renamedlfile = renamed and renamed in lfiles
                             islfile |= renamedlfile
                             if b'l' in fctx.flags():
                                 if renamedlfile:
                                     raise error.Abort(
                                         _(b'renamed/copied largefile %s becomes symlink')
                                         % f
                                     )
                                 islfile = False
                         if islfile:
                             lfiles.add(f)
                         else:
                             normalfiles.add(f)
                     if f in lfiles:
                         fstandin = lfutil.standin(f)
                         dstfiles.append(fstandin)
                         # largefile in manifest if it has not been removed/renamed
                         if f in ctx.manifest():
                             fctx = ctx.filectx(f)
                             if b'l' in fctx.flags():
                                 renamed = fctx.copysource()
                                 if renamed and renamed in lfiles:
                                     raise error.Abort(
                                         _(b'largefile %s becomes symlink') % f
                                     )
                             # largefile was modified, update standins
-                            m = hashlib.sha1(b'')
+                            m = hashutil.sha1(b'')
                             m.update(ctx[f].data())
                             hash = node.hex(m.digest())
                             if f not in lfiletohash or lfiletohash[f] != hash:
                                 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
                                 executable = b'x' in ctx[f].flags()
                                 lfutil.writestandin(rdst, fstandin, hash, executable)
                                 lfiletohash[f] = hash
                     else:
                         # normal file
                         dstfiles.append(f)
                 def getfilectx(repo, memctx, f):
                     srcfname = lfutil.splitstandin(f)
                     if srcfname is not None:
                         # if the file isn't in the manifest then it was removed
                         # or renamed, return None to indicate this
                         try:
                             fctx = ctx.filectx(srcfname)
                         except error.LookupError:
                             return None
                         renamed = fctx.copysource()
                         if renamed:
                             # standin is always a largefile because largefile-ness
                             # doesn't change after rename or copy
                             renamed = lfutil.standin(renamed)
                         return context.memfilectx(
                             repo,
                             memctx,
                             f,
                             lfiletohash[srcfname] + b'\n',
                             b'l' in fctx.flags(),
                             b'x' in fctx.flags(),
                             renamed,
                         )
                     else:
                         return _getnormalcontext(repo, ctx, f, revmap)
                 # Commit
                 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
             def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
                 mctx = context.memctx(
                     rdst,
                     parents,
                     ctx.description(),
                     dstfiles,
                     getfilectx,
                     ctx.user(),
                     ctx.date(),
                     ctx.extra(),
                 )
                 ret = rdst.commitctx(mctx)
                 lfutil.copyalltostore(rdst, ret)
                 rdst.setparents(ret)
                 revmap[ctx.node()] = rdst.changelog.tip()
             # Generate list of changed files
             def _getchangedfiles(ctx, parents):
                 files = set(ctx.files())
                 if node.nullid not in parents:
                     mc = ctx.manifest()
                     for pctx in ctx.parents():
                         for fn in pctx.manifest().diff(mc):
                             files.add(fn)
                 return files
             # Convert src parents to dst parents
             def _convertparents(ctx, revmap):
                 parents = []
                 for p in ctx.parents():
                     parents.append(revmap[p.node()])
                 while len(parents) < 2:
                     parents.append(node.nullid)
                 return parents
             # Get memfilectx for a normal file
             def _getnormalcontext(repo, ctx, f, revmap):
                 try:
                     fctx = ctx.filectx(f)
                 except error.LookupError:
                     return None
                 renamed = fctx.copysource()
                 data = fctx.data()
                 if f == b'.hgtags':
                     data = _converttags(repo.ui, revmap, data)
                 return context.memfilectx(
                     repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed
                 )
             # Remap tag data using a revision map
             def _converttags(ui, revmap, data):
                 newdata = []
                 for line in data.splitlines():
                     try:
                         id, name = line.split(b' ', 1)
                     except ValueError:
                         ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line)
                         continue
                     try:
                         newid = node.bin(id)
                     except TypeError:
                         ui.warn(_(b'skipping incorrectly formatted id %s\n') % id)
                         continue
                     try:
                         newdata.append(b'%s %s\n' % (node.hex(revmap[newid]), name))
                     except KeyError:
                         ui.warn(_(b'no mapping for id %s\n') % id)
                         continue
                 return b''.join(newdata)
             def _islfile(file, ctx, matcher, size):
                 '''Return true if file should be considered a largefile, i.e.
                 matcher matches it or it is larger than size.'''
                 # never store special .hg* files as largefiles
                 if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs':
                     return False
                 if matcher and matcher(file):
                     return True
                 try:
                     return ctx.filectx(file).size() >= size * 1024 * 1024
                 except error.LookupError:
                     return False
             def uploadlfiles(ui, rsrc, rdst, files):
                 '''upload largefiles to the central store'''
                 if not files:
                     return
                 store = storefactory.openstore(rsrc, rdst, put=True)
                 at = 0
                 ui.debug(b"sending statlfile command for %d largefiles\n" % len(files))
                 retval = store.exists(files)
                 files = [h for h in files if not retval[h]]
                 ui.debug(b"%d largefiles need to be uploaded\n" % len(files))
                 with ui.makeprogress(
                     _(b'uploading largefiles'), unit=_(b'files'), total=len(files)
                 ) as progress:
                     for hash in files:
                         progress.update(at)
                         source = lfutil.findfile(rsrc, hash)
                         if not source:
                             raise error.Abort(
                                 _(
                                     b'largefile %s missing from store'
                                     b' (needs to be uploaded)'
                                 )
                                 % hash
                             )
                         # XXX check for errors here
                         store.put(source, hash)
                         at += 1
             def verifylfiles(ui, repo, all=False, contents=False):
                 '''Verify that every largefile revision in the current changeset
                 exists in the central store.  With --contents, also verify that
                 the contents of each local largefile file revision are correct (SHA-1 hash
                 matches the revision ID).  With --all, check every changeset in
                 this repository.'''
                 if all:
                     revs = repo.revs(b'all()')
                 else:
                     revs = [b'.']
                 store = storefactory.openstore(repo)
                 return store.verify(revs, contents=contents)
             def cachelfiles(ui, repo, node, filelist=None):
                 '''cachelfiles ensures that all largefiles needed by the specified revision
                 are present in the repository's largefile cache.
                 returns a tuple (cached, missing).  cached is the list of files downloaded
                 by this operation; missing is the list of files that were needed but could
                 not be found.'''
                 lfiles = lfutil.listlfiles(repo, node)
                 if filelist:
                     lfiles = set(lfiles) & set(filelist)
                 toget = []
                 ctx = repo[node]
                 for lfile in lfiles:
                     try:
                         expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
                     except IOError as err:
                         if err.errno == errno.ENOENT:
                             continue  # node must be None and standin wasn't found in wctx
                         raise
                     if not lfutil.findfile(repo, expectedhash):
                         toget.append((lfile, expectedhash))
                 if toget:
                     store = storefactory.openstore(repo)
                     ret = store.get(toget)
                     return ret
                 return ([], [])
             def downloadlfiles(ui, repo, rev=None):
                 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
                 def prepare(ctx, fns):
                     pass
                 totalsuccess = 0
                 totalmissing = 0
                 if rev != []:  # walkchangerevs on empty list would return all revs
                     for ctx in cmdutil.walkchangerevs(repo, match, {b'rev': rev}, prepare):
                         success, missing = cachelfiles(ui, repo, ctx.node())
                         totalsuccess += len(success)
                         totalmissing += len(missing)
                 ui.status(_(b"%d additional largefiles cached\n") % totalsuccess)
                 if totalmissing > 0:
                     ui.status(_(b"%d largefiles failed to download\n") % totalmissing)
                 return totalsuccess, totalmissing
             def updatelfiles(
                 ui, repo, filelist=None, printmessage=None, normallookup=False
             ):
                 '''Update largefiles according to standins in the working directory
                 If ``printmessage`` is other than ``None``, it means "print (or
                 ignore, for false) message forcibly".
                 '''
                 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
                 with repo.wlock():
                     lfdirstate = lfutil.openlfdirstate(ui, repo)
                     lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
                     if filelist is not None:
                         filelist = set(filelist)
                         lfiles = [f for f in lfiles if f in filelist]
                     update = {}
                     dropped = set()
                     updated, removed = 0, 0
                     wvfs = repo.wvfs
                     wctx = repo[None]
                     for lfile in lfiles:
                         lfileorig = os.path.relpath(
                             scmutil.backuppath(ui, repo, lfile), start=repo.root
                         )
                         standin = lfutil.standin(lfile)
                         standinorig = os.path.relpath(
                             scmutil.backuppath(ui, repo, standin), start=repo.root
                         )
                         if wvfs.exists(standin):
                             if wvfs.exists(standinorig) and wvfs.exists(lfile):
                                 shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig))
                                 wvfs.unlinkpath(standinorig)
                             expecthash = lfutil.readasstandin(wctx[standin])
                             if expecthash != b'':
                                 if lfile not in wctx:  # not switched to normal file
                                     if repo.dirstate[standin] != b'?':
                                         wvfs.unlinkpath(lfile, ignoremissing=True)
                                     else:
                                         dropped.add(lfile)
                                 # use normallookup() to allocate an entry in largefiles
                                 # dirstate to prevent lfilesrepo.status() from reporting
                                 # missing files as removed.
                                 lfdirstate.normallookup(lfile)
                                 update[lfile] = expecthash
                         else:
                             # Remove lfiles for which the standin is deleted, unless the
                             # lfile is added to the repository again. This happens when a
                             # largefile is converted back to a normal file: the standin
                             # disappears, but a new (normal) file appears as the lfile.
                             if (
                                 wvfs.exists(lfile)
                                 and repo.dirstate.normalize(lfile) not in wctx
                             ):
                                 wvfs.unlinkpath(lfile)
                                 removed += 1
                     # largefile processing might be slow and be interrupted - be prepared
                     lfdirstate.write()
                     if lfiles:
                         lfiles = [f for f in lfiles if f not in dropped]
                         for f in dropped:
                             repo.wvfs.unlinkpath(lfutil.standin(f))
                             # This needs to happen for dropped files, otherwise they stay in
                             # the M state.
                             lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
                         statuswriter(_(b'getting changed largefiles\n'))
                         cachelfiles(ui, repo, None, lfiles)
                     for lfile in lfiles:
                         update1 = 0
                         expecthash = update.get(lfile)
                         if expecthash:
                             if not lfutil.copyfromcache(repo, expecthash, lfile):
                                 # failed ... but already removed and set to normallookup
                                 continue
                             # Synchronize largefile dirstate to the last modified
                             # time of the file
                             lfdirstate.normal(lfile)
                             update1 = 1
                         # copy the exec mode of largefile standin from the repository's
                         # dirstate to its state in the lfdirstate.
                         standin = lfutil.standin(lfile)
                         if wvfs.exists(standin):
                             # exec is decided by the users permissions using mask 0o100
                             standinexec = wvfs.stat(standin).st_mode & 0o100
                             st = wvfs.stat(lfile)
                             mode = st.st_mode
                             if standinexec != mode & 0o100:
                                 # first remove all X bits, then shift all R bits to X
                                 mode &= ~0o111
                                 if standinexec:
                                     mode |= (mode >> 2) & 0o111 & ~util.umask
                                 wvfs.chmod(lfile, mode)
                                 update1 = 1
                         updated += update1
                         lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
                     lfdirstate.write()
                     if lfiles:
                         statuswriter(
                             _(b'%d largefiles updated, %d removed\n') % (updated, removed)
                         )
             @eh.command(
                 b'lfpull',
                 [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))]
                 + cmdutil.remoteopts,
                 _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'),
             )
             def lfpull(ui, repo, source=b"default", **opts):
                 """pull largefiles for the specified revisions from the specified source
                 Pull largefiles that are referenced from local changesets but missing
                 locally, pulling from a remote repository to the local cache.
                 If SOURCE is omitted, the 'default' path will be used.
                 See :hg:`help urls` for more information.
                 .. container:: verbose
                   Some examples:
                   - pull largefiles for all branch heads::
                       hg lfpull -r "head() and not closed()"
                   - pull largefiles on the default branch::
                       hg lfpull -r "branch(default)"
                 """
                 repo.lfpullsource = source
                 revs = opts.get('rev', [])
                 if not revs:
                     raise error.Abort(_(b'no revisions specified'))
                 revs = scmutil.revrange(repo, revs)
                 numcached = 0
                 for rev in revs:
                     ui.note(_(b'pulling largefiles for revision %d\n') % rev)
                     (cached, missing) = cachelfiles(ui, repo, rev)
                     numcached += len(cached)
                 ui.status(_(b"%d largefiles cached\n") % numcached)
             @eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE'))
             def debuglfput(ui, repo, filepath, **kwargs):
                 hash = lfutil.hashfile(filepath)
                 storefactory.openstore(repo).put(filepath, hash)
                 ui.write(b'%s\n' % hash)
                 return 0

hgext/largefiles/lfutil.py

0 +3 -3

             # Copyright 2009-2010 Gregory P. Ward
             # Copyright 2009-2010 Intelerad Medical Systems Incorporated
             # Copyright 2010-2011 Fog Creek Software
             # Copyright 2010-2011 Unity Technologies
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''largefiles utility code: must not import other modules in this package.'''
             from __future__ import absolute_import
             import contextlib
             import copy
-            import hashlib
             import os
             import stat
             from mercurial.i18n import _
             from mercurial.node import hex
             from mercurial.pycompat import open
             from mercurial import (
                 dirstate,
                 encoding,
                 error,
                 httpconnection,
                 match as matchmod,
                 node,
                 pycompat,
                 scmutil,
                 sparse,
                 util,
                 vfs as vfsmod,
             )
+            from mercurial.utils import hashutil
             shortname = b'.hglf'
             shortnameslash = shortname + b'/'
             longname = b'largefiles'
             # -- Private worker functions ------------------------------------------
             @contextlib.contextmanager
             def lfstatus(repo, value=True):
                 oldvalue = getattr(repo, 'lfstatus', False)
                 repo.lfstatus = value
                 try:
                     yield
                 finally:
                     repo.lfstatus = oldvalue
             def getminsize(ui, assumelfiles, opt, default=10):
                 lfsize = opt
                 if not lfsize and assumelfiles:
                     lfsize = ui.config(longname, b'minsize', default=default)
                 if lfsize:
                     try:
                         lfsize = float(lfsize)
                     except ValueError:
                         raise error.Abort(
                             _(b'largefiles: size must be number (not %s)\n') % lfsize
                         )
                 if lfsize is None:
                     raise error.Abort(_(b'minimum size for largefiles must be specified'))
                 return lfsize
             def link(src, dest):
                 """Try to create hardlink - if that fails, efficiently make a copy."""
                 util.makedirs(os.path.dirname(dest))
                 try:
                     util.oslink(src, dest)
                 except OSError:
                     # if hardlinks fail, fallback on atomic copy
                     with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
                         for chunk in util.filechunkiter(srcf):
                             dstf.write(chunk)
                     os.chmod(dest, os.stat(src).st_mode)
             def usercachepath(ui, hash):
                 '''Return the correct location in the "global" largefiles cache for a file
                 with the given hash.
                 This cache is used for sharing of largefiles across repositories - both
                 to preserve download bandwidth and storage space.'''
                 return os.path.join(_usercachedir(ui), hash)
             def _usercachedir(ui, name=longname):
                 '''Return the location of the "global" largefiles cache.'''
                 path = ui.configpath(name, b'usercache')
                 if path:
                     return path
                 if pycompat.iswindows:
                     appdata = encoding.environ.get(
                         b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
                     )
                     if appdata:
                         return os.path.join(appdata, name)
                 elif pycompat.isdarwin:
                     home = encoding.environ.get(b'HOME')
                     if home:
                         return os.path.join(home, b'Library', b'Caches', name)
                 elif pycompat.isposix:
                     path = encoding.environ.get(b'XDG_CACHE_HOME')
                     if path:
                         return os.path.join(path, name)
                     home = encoding.environ.get(b'HOME')
                     if home:
                         return os.path.join(home, b'.cache', name)
                 else:
                     raise error.Abort(
                         _(b'unknown operating system: %s\n') % pycompat.osname
                     )
                 raise error.Abort(_(b'unknown %s usercache location') % name)
             def inusercache(ui, hash):
                 path = usercachepath(ui, hash)
                 return os.path.exists(path)
             def findfile(repo, hash):
                 '''Return store path of the largefile with the specified hash.
                 As a side effect, the file might be linked from user cache.
                 Return None if the file can't be found locally.'''
                 path, exists = findstorepath(repo, hash)
                 if exists:
                     repo.ui.note(_(b'found %s in store\n') % hash)
                     return path
                 elif inusercache(repo.ui, hash):
                     repo.ui.note(_(b'found %s in system cache\n') % hash)
                     path = storepath(repo, hash)
                     link(usercachepath(repo.ui, hash), path)
                     return path
                 return None
             class largefilesdirstate(dirstate.dirstate):
                 def __getitem__(self, key):
                     return super(largefilesdirstate, self).__getitem__(unixpath(key))
                 def normal(self, f):
                     return super(largefilesdirstate, self).normal(unixpath(f))
                 def remove(self, f):
                     return super(largefilesdirstate, self).remove(unixpath(f))
                 def add(self, f):
                     return super(largefilesdirstate, self).add(unixpath(f))
                 def drop(self, f):
                     return super(largefilesdirstate, self).drop(unixpath(f))
                 def forget(self, f):
                     return super(largefilesdirstate, self).forget(unixpath(f))
                 def normallookup(self, f):
                     return super(largefilesdirstate, self).normallookup(unixpath(f))
                 def _ignore(self, f):
                     return False
                 def write(self, tr=False):
                     # (1) disable PENDING mode always
                     #     (lfdirstate isn't yet managed as a part of the transaction)
                     # (2) avoid develwarn 'use dirstate.write with ....'
                     super(largefilesdirstate, self).write(None)
             def openlfdirstate(ui, repo, create=True):
                 '''
                 Return a dirstate object that tracks largefiles: i.e. its root is
                 the repo root, but it is saved in .hg/largefiles/dirstate.
                 '''
                 vfs = repo.vfs
                 lfstoredir = longname
                 opener = vfsmod.vfs(vfs.join(lfstoredir))
                 lfdirstate = largefilesdirstate(
                     opener,
                     ui,
                     repo.root,
                     repo.dirstate._validate,
                     lambda: sparse.matcher(repo),
                 )
                 # If the largefiles dirstate does not exist, populate and create
                 # it. This ensures that we create it on the first meaningful
                 # largefiles operation in a new clone.
                 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
                     matcher = getstandinmatcher(repo)
                     standins = repo.dirstate.walk(
                         matcher, subrepos=[], unknown=False, ignored=False
                     )
                     if len(standins) > 0:
                         vfs.makedirs(lfstoredir)
                     for standin in standins:
                         lfile = splitstandin(standin)
                         lfdirstate.normallookup(lfile)
                 return lfdirstate
             def lfdirstatestatus(lfdirstate, repo):
                 pctx = repo[b'.']
                 match = matchmod.always()
                 unsure, s = lfdirstate.status(
                     match, subrepos=[], ignored=False, clean=False, unknown=False
                 )
                 modified, clean = s.modified, s.clean
                 for lfile in unsure:
                     try:
                         fctx = pctx[standin(lfile)]
                     except LookupError:
                         fctx = None
                     if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
                         modified.append(lfile)
                     else:
                         clean.append(lfile)
                         lfdirstate.normal(lfile)
                 return s
             def listlfiles(repo, rev=None, matcher=None):
                 '''return a list of largefiles in the working copy or the
                 specified changeset'''
                 if matcher is None:
                     matcher = getstandinmatcher(repo)
                 # ignore unknown files in working directory
                 return [
                     splitstandin(f)
                     for f in repo[rev].walk(matcher)
                     if rev is not None or repo.dirstate[f] != b'?'
                 ]
             def instore(repo, hash, forcelocal=False):
                 '''Return true if a largefile with the given hash exists in the store'''
                 return os.path.exists(storepath(repo, hash, forcelocal))
             def storepath(repo, hash, forcelocal=False):
                 '''Return the correct location in the repository largefiles store for a
                 file with the given hash.'''
                 if not forcelocal and repo.shared():
                     return repo.vfs.reljoin(repo.sharedpath, longname, hash)
                 return repo.vfs.join(longname, hash)
             def findstorepath(repo, hash):
                 '''Search through the local store path(s) to find the file for the given
                 hash.  If the file is not found, its path in the primary store is returned.
                 The return value is a tuple of (path, exists(path)).
                 '''
                 # For shared repos, the primary store is in the share source.  But for
                 # backward compatibility, force a lookup in the local store if it wasn't
                 # found in the share source.
                 path = storepath(repo, hash, False)
                 if instore(repo, hash):
                     return (path, True)
                 elif repo.shared() and instore(repo, hash, True):
                     return storepath(repo, hash, True), True
                 return (path, False)
             def copyfromcache(repo, hash, filename):
                 '''Copy the specified largefile from the repo or system cache to
                 filename in the repository. Return true on success or false if the
                 file was not found in either cache (which should not happened:
                 this is meant to be called only after ensuring that the needed
                 largefile exists in the cache).'''
                 wvfs = repo.wvfs
                 path = findfile(repo, hash)
                 if path is None:
                     return False
                 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
                 # The write may fail before the file is fully written, but we
                 # don't use atomic writes in the working copy.
                 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
                     gothash = copyandhash(util.filechunkiter(srcfd), destfd)
                 if gothash != hash:
                     repo.ui.warn(
                         _(b'%s: data corruption in %s with hash %s\n')
                         % (filename, path, gothash)
                     )
                     wvfs.unlink(filename)
                     return False
                 return True
             def copytostore(repo, ctx, file, fstandin):
                 wvfs = repo.wvfs
                 hash = readasstandin(ctx[fstandin])
                 if instore(repo, hash):
                     return
                 if wvfs.exists(file):
                     copytostoreabsolute(repo, wvfs.join(file), hash)
                 else:
                     repo.ui.warn(
                         _(b"%s: largefile %s not available from local store\n")
                         % (file, hash)
                     )
             def copyalltostore(repo, node):
                 '''Copy all largefiles in a given revision to the store'''
                 ctx = repo[node]
                 for filename in ctx.files():
                     realfile = splitstandin(filename)
                     if realfile is not None and filename in ctx.manifest():
                         copytostore(repo, ctx, realfile, filename)
             def copytostoreabsolute(repo, file, hash):
                 if inusercache(repo.ui, hash):
                     link(usercachepath(repo.ui, hash), storepath(repo, hash))
                 else:
                     util.makedirs(os.path.dirname(storepath(repo, hash)))
                     with open(file, b'rb') as srcf:
                         with util.atomictempfile(
                             storepath(repo, hash), createmode=repo.store.createmode
                         ) as dstf:
                             for chunk in util.filechunkiter(srcf):
                                 dstf.write(chunk)
                     linktousercache(repo, hash)
             def linktousercache(repo, hash):
                 '''Link / copy the largefile with the specified hash from the store
                 to the cache.'''
                 path = usercachepath(repo.ui, hash)
                 link(storepath(repo, hash), path)
             def getstandinmatcher(repo, rmatcher=None):
                 '''Return a match object that applies rmatcher to the standin directory'''
                 wvfs = repo.wvfs
                 standindir = shortname
                 # no warnings about missing files or directories
                 badfn = lambda f, msg: None
                 if rmatcher and not rmatcher.always():
                     pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
                     if not pats:
                         pats = [wvfs.join(standindir)]
                     match = scmutil.match(repo[None], pats, badfn=badfn)
                 else:
                     # no patterns: relative to repo root
                     match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
                 return match
             def composestandinmatcher(repo, rmatcher):
                 '''Return a matcher that accepts standins corresponding to the
                 files accepted by rmatcher. Pass the list of files in the matcher
                 as the paths specified by the user.'''
                 smatcher = getstandinmatcher(repo, rmatcher)
                 isstandin = smatcher.matchfn
                 def composedmatchfn(f):
                     return isstandin(f) and rmatcher.matchfn(splitstandin(f))
                 smatcher.matchfn = composedmatchfn
                 return smatcher
             def standin(filename):
                 '''Return the repo-relative path to the standin for the specified big
                 file.'''
                 # Notes:
                 # 1) Some callers want an absolute path, but for instance addlargefiles
                 #    needs it repo-relative so it can be passed to repo[None].add().  So
                 #    leave it up to the caller to use repo.wjoin() to get an absolute path.
                 # 2) Join with '/' because that's what dirstate always uses, even on
                 #    Windows. Change existing separator to '/' first in case we are
                 #    passed filenames from an external source (like the command line).
                 return shortnameslash + util.pconvert(filename)
             def isstandin(filename):
                 '''Return true if filename is a big file standin. filename must be
                 in Mercurial's internal form (slash-separated).'''
                 return filename.startswith(shortnameslash)
             def splitstandin(filename):
                 # Split on / because that's what dirstate always uses, even on Windows.
                 # Change local separator to / first just in case we are passed filenames
                 # from an external source (like the command line).
                 bits = util.pconvert(filename).split(b'/', 1)
                 if len(bits) == 2 and bits[0] == shortname:
                     return bits[1]
                 else:
                     return None
             def updatestandin(repo, lfile, standin):
                 """Re-calculate hash value of lfile and write it into standin
                 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
                 """
                 file = repo.wjoin(lfile)
                 if repo.wvfs.exists(lfile):
                     hash = hashfile(file)
                     executable = getexecutable(file)
                     writestandin(repo, standin, hash, executable)
                 else:
                     raise error.Abort(_(b'%s: file not found!') % lfile)
             def readasstandin(fctx):
                 '''read hex hash from given filectx of standin file
                 This encapsulates how "standin" data is stored into storage layer.'''
                 return fctx.data().strip()
             def writestandin(repo, standin, hash, executable):
                 '''write hash to <repo.root>/<standin>'''
                 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
             def copyandhash(instream, outfile):
                 '''Read bytes from instream (iterable) and write them to outfile,
                 computing the SHA-1 hash of the data along the way. Return the hash.'''
-                hasher = hashlib.sha1(b'')
+                hasher = hashutil.sha1(b'')
                 for data in instream:
                     hasher.update(data)
                     outfile.write(data)
                 return hex(hasher.digest())
             def hashfile(file):
                 if not os.path.exists(file):
                     return b''
                 with open(file, b'rb') as fd:
                     return hexsha1(fd)
             def getexecutable(filename):
                 mode = os.stat(filename).st_mode
                 return (
                     (mode & stat.S_IXUSR)
                     and (mode & stat.S_IXGRP)
                     and (mode & stat.S_IXOTH)
                 )
             def urljoin(first, second, *arg):
                 def join(left, right):
                     if not left.endswith(b'/'):
                         left += b'/'
                     if right.startswith(b'/'):
                         right = right[1:]
                     return left + right
                 url = join(first, second)
                 for a in arg:
                     url = join(url, a)
                 return url
             def hexsha1(fileobj):
                 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
                 object data"""
-                h = hashlib.sha1()
+                h = hashutil.sha1()
                 for chunk in util.filechunkiter(fileobj):
                     h.update(chunk)
                 return hex(h.digest())
             def httpsendfile(ui, filename):
                 return httpconnection.httpsendfile(ui, filename, b'rb')
             def unixpath(path):
                 '''Return a version of path normalized for use with the lfdirstate.'''
                 return util.pconvert(os.path.normpath(path))
             def islfilesrepo(repo):
                 '''Return true if the repo is a largefile repo.'''
                 if b'largefiles' in repo.requirements and any(
                     shortnameslash in f[0] for f in repo.store.datafiles()
                 ):
                     return True
                 return any(openlfdirstate(repo.ui, repo, False))
             class storeprotonotcapable(Exception):
                 def __init__(self, storetypes):
                     self.storetypes = storetypes
             def getstandinsstate(repo):
                 standins = []
                 matcher = getstandinmatcher(repo)
                 wctx = repo[None]
                 for standin in repo.dirstate.walk(
                     matcher, subrepos=[], unknown=False, ignored=False
                 ):
                     lfile = splitstandin(standin)
                     try:
                         hash = readasstandin(wctx[standin])
                     except IOError:
                         hash = None
                     standins.append((lfile, hash))
                 return standins
             def synclfdirstate(repo, lfdirstate, lfile, normallookup):
                 lfstandin = standin(lfile)
                 if lfstandin in repo.dirstate:
                     stat = repo.dirstate._map[lfstandin]
                     state, mtime = stat[0], stat[3]
                 else:
                     state, mtime = b'?', -1
                 if state == b'n':
                     if normallookup or mtime < 0 or not repo.wvfs.exists(lfile):
                         # state 'n' doesn't ensure 'clean' in this case
                         lfdirstate.normallookup(lfile)
                     else:
                         lfdirstate.normal(lfile)
                 elif state == b'm':
                     lfdirstate.normallookup(lfile)
                 elif state == b'r':
                     lfdirstate.remove(lfile)
                 elif state == b'a':
                     lfdirstate.add(lfile)
                 elif state == b'?':
                     lfdirstate.drop(lfile)
             def markcommitted(orig, ctx, node):
                 repo = ctx.repo()
                 orig(node)
                 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
                 # because files coming from the 2nd parent are omitted in the latter.
                 #
                 # The former should be used to get targets of "synclfdirstate",
                 # because such files:
                 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
                 # - have to be marked as "n" after commit, but
                 # - aren't listed in "repo[node].files()"
                 lfdirstate = openlfdirstate(repo.ui, repo)
                 for f in ctx.files():
                     lfile = splitstandin(f)
                     if lfile is not None:
                         synclfdirstate(repo, lfdirstate, lfile, False)
                 lfdirstate.write()
                 # As part of committing, copy all of the largefiles into the cache.
                 #
                 # Using "node" instead of "ctx" implies additional "repo[node]"
                 # lookup while copyalltostore(), but can omit redundant check for
                 # files comming from the 2nd parent, which should exist in store
                 # at merging.
                 copyalltostore(repo, node)
             def getlfilestoupdate(oldstandins, newstandins):
                 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
                 filelist = []
                 for f in changedstandins:
                     if f[0] not in filelist:
                         filelist.append(f[0])
                 return filelist
             def getlfilestoupload(repo, missing, addfunc):
                 makeprogress = repo.ui.makeprogress
                 with makeprogress(
                     _(b'finding outgoing largefiles'),
                     unit=_(b'revisions'),
                     total=len(missing),
                 ) as progress:
                     for i, n in enumerate(missing):
                         progress.update(i)
                         parents = [p for p in repo[n].parents() if p != node.nullid]
                         with lfstatus(repo, value=False):
                             ctx = repo[n]
                         files = set(ctx.files())
                         if len(parents) == 2:
                             mc = ctx.manifest()
                             mp1 = ctx.p1().manifest()
                             mp2 = ctx.p2().manifest()
                             for f in mp1:
                                 if f not in mc:
                                     files.add(f)
                             for f in mp2:
                                 if f not in mc:
                                     files.add(f)
                             for f in mc:
                                 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
                                     files.add(f)
                         for fn in files:
                             if isstandin(fn) and fn in ctx:
                                 addfunc(fn, readasstandin(ctx[fn]))
             def updatestandinsbymatch(repo, match):
                 '''Update standins in the working directory according to specified match
                 This returns (possibly modified) ``match`` object to be used for
                 subsequent commit process.
                 '''
                 ui = repo.ui
                 # Case 1: user calls commit with no specific files or
                 # include/exclude patterns: refresh and commit all files that
                 # are "dirty".
                 if match is None or match.always():
                     # Spend a bit of time here to get a list of files we know
                     # are modified so we can compare only against those.
                     # It can cost a lot of time (several seconds)
                     # otherwise to update all standins if the largefiles are
                     # large.
                     lfdirstate = openlfdirstate(ui, repo)
                     dirtymatch = matchmod.always()
                     unsure, s = lfdirstate.status(
                         dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
                     )
                     modifiedfiles = unsure + s.modified + s.added + s.removed
                     lfiles = listlfiles(repo)
                     # this only loops through largefiles that exist (not
                     # removed/renamed)
                     for lfile in lfiles:
                         if lfile in modifiedfiles:
                             fstandin = standin(lfile)
                             if repo.wvfs.exists(fstandin):
                                 # this handles the case where a rebase is being
                                 # performed and the working copy is not updated
                                 # yet.
                                 if repo.wvfs.exists(lfile):
                                     updatestandin(repo, lfile, fstandin)
                     return match
                 lfiles = listlfiles(repo)
                 match._files = repo._subdirlfs(match.files(), lfiles)
                 # Case 2: user calls commit with specified patterns: refresh
                 # any matching big files.
                 smatcher = composestandinmatcher(repo, match)
                 standins = repo.dirstate.walk(
                     smatcher, subrepos=[], unknown=False, ignored=False
                 )
                 # No matching big files: get out of the way and pass control to
                 # the usual commit() method.
                 if not standins:
                     return match
                 # Refresh all matching big files.  It's possible that the
                 # commit will end up failing, in which case the big files will
                 # stay refreshed.  No harm done: the user modified them and
                 # asked to commit them, so sooner or later we're going to
                 # refresh the standins.  Might as well leave them refreshed.
                 lfdirstate = openlfdirstate(ui, repo)
                 for fstandin in standins:
                     lfile = splitstandin(fstandin)
                     if lfdirstate[lfile] != b'r':
                         updatestandin(repo, lfile, fstandin)
                 # Cook up a new matcher that only matches regular files or
                 # standins corresponding to the big files requested by the
                 # user.  Have to modify _files to prevent commit() from
                 # complaining "not tracked" for big files.
                 match = copy.copy(match)
                 origmatchfn = match.matchfn
                 # Check both the list of largefiles and the list of
                 # standins because if a largefile was removed, it
                 # won't be in the list of largefiles at this point
                 match._files += sorted(standins)
                 actualfiles = []
                 for f in match._files:
                     fstandin = standin(f)
                     # For largefiles, only one of the normal and standin should be
                     # committed (except if one of them is a remove).  In the case of a
                     # standin removal, drop the normal file if it is unknown to dirstate.
                     # Thus, skip plain largefile names but keep the standin.
                     if f in lfiles or fstandin in standins:
                         if repo.dirstate[fstandin] != b'r':
                             if repo.dirstate[f] != b'r':
                                 continue
                         elif repo.dirstate[f] == b'?':
                             continue
                     actualfiles.append(f)
                 match._files = actualfiles
                 def matchfn(f):
                     if origmatchfn(f):
                         return f not in lfiles
                     else:
                         return f in standins
                 match.matchfn = matchfn
                 return match
             class automatedcommithook(object):
                 '''Stateful hook to update standins at the 1st commit of resuming
                 For efficiency, updating standins in the working directory should
                 be avoided while automated committing (like rebase, transplant and
                 so on), because they should be updated before committing.
                 But the 1st commit of resuming automated committing (e.g. ``rebase
                 --continue``) should update them, because largefiles may be
                 modified manually.
                 '''
                 def __init__(self, resuming):
                     self.resuming = resuming
                 def __call__(self, repo, match):
                     if self.resuming:
                         self.resuming = False  # avoids updating at subsequent commits
                         return updatestandinsbymatch(repo, match)
                     else:
                         return match
             def getstatuswriter(ui, repo, forcibly=None):
                 '''Return the function to write largefiles specific status out
                 If ``forcibly`` is ``None``, this returns the last element of
                 ``repo._lfstatuswriters`` as "default" writer function.
                 Otherwise, this returns the function to always write out (or
                 ignore if ``not forcibly``) status.
                 '''
                 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
                     return repo._lfstatuswriters[-1]
                 else:
                     if forcibly:
                         return ui.status  # forcibly WRITE OUT
                     else:
                         return lambda *msg, **opts: None  # forcibly IGNORE

hgext/remotefilelog/basepack.py

0 +2 -2

             from __future__ import absolute_import
             import collections
             import errno
-            import hashlib
             import mmap
             import os
             import struct
             import time
             from mercurial.i18n import _
             from mercurial.pycompat import (
                 getattr,
                 open,
             )
             from mercurial import (
                 node as nodemod,
                 policy,
                 pycompat,
                 util,
                 vfs as vfsmod,
             )
+            from mercurial.utils import hashutil
             from . import shallowutil
             osutil = policy.importmod('osutil')
             # The pack version supported by this implementation. This will need to be
             # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
             # changing any of the int sizes, changing the delta algorithm, etc.
             PACKVERSIONSIZE = 1
             INDEXVERSIONSIZE = 2
             FANOUTSTART = INDEXVERSIONSIZE
             # Constant that indicates a fanout table entry hasn't been filled in. (This does
             # not get serialized)
             EMPTYFANOUT = -1
             # The fanout prefix is the number of bytes that can be addressed by the fanout
             # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
             # look in the fanout table (which will be 2^8 entries long).
             SMALLFANOUTPREFIX = 1
             LARGEFANOUTPREFIX = 2
             # The number of entries in the index at which point we switch to a large fanout.
             # It is chosen to balance the linear scan through a sparse fanout, with the
             # size of the bisect in actual index.
             # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
             # bisect) with (8 step fanout scan + 1 step bisect)
             # 5 step bisect = log(2^16 / 8 / 255)  # fanout
             # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
             SMALLFANOUTCUTOFF = 2 ** 16 // 8
             # The amount of time to wait between checking for new packs. This prevents an
             # exception when data is moved to a new pack after the process has already
             # loaded the pack list.
             REFRESHRATE = 0.1
             if pycompat.isposix and not pycompat.ispy3:
                 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                 # The 'e' flag will be ignored on older versions of glibc.
                 # Python 3 can't handle the 'e' flag.
                 PACKOPENMODE = b'rbe'
             else:
                 PACKOPENMODE = b'rb'
             class _cachebackedpacks(object):
                 def __init__(self, packs, cachesize):
                     self._packs = set(packs)
                     self._lrucache = util.lrucachedict(cachesize)
                     self._lastpack = None
                     # Avoid cold start of the cache by populating the most recent packs
                     # in the cache.
                     for i in reversed(range(min(cachesize, len(packs)))):
                         self._movetofront(packs[i])
                 def _movetofront(self, pack):
                     # This effectively makes pack the first entry in the cache.
                     self._lrucache[pack] = True
                 def _registerlastpackusage(self):
                     if self._lastpack is not None:
                         self._movetofront(self._lastpack)
                         self._lastpack = None
                 def add(self, pack):
                     self._registerlastpackusage()
                     # This method will mostly be called when packs are not in cache.
                     # Therefore, adding pack to the cache.
                     self._movetofront(pack)
                     self._packs.add(pack)
                 def __iter__(self):
                     self._registerlastpackusage()
                     # Cache iteration is based on LRU.
                     for pack in self._lrucache:
                         self._lastpack = pack
                         yield pack
                     cachedpacks = set(pack for pack in self._lrucache)
                     # Yield for paths not in the cache.
                     for pack in self._packs - cachedpacks:
                         self._lastpack = pack
                         yield pack
                     # Data not found in any pack.
                     self._lastpack = None
             class basepackstore(object):
                 # Default cache size limit for the pack files.
                 DEFAULTCACHESIZE = 100
                 def __init__(self, ui, path):
                     self.ui = ui
                     self.path = path
                     # lastrefesh is 0 so we'll immediately check for new packs on the first
                     # failure.
                     self.lastrefresh = 0
                     packs = []
                     for filepath, __, __ in self._getavailablepackfilessorted():
                         try:
                             pack = self.getpack(filepath)
                         except Exception as ex:
                             # An exception may be thrown if the pack file is corrupted
                             # somehow.  Log a warning but keep going in this case, just
                             # skipping this pack file.
                             #
                             # If this is an ENOENT error then don't even bother logging.
                             # Someone could have removed the file since we retrieved the
                             # list of paths.
                             if getattr(ex, 'errno', None) != errno.ENOENT:
                                 ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
                             continue
                         packs.append(pack)
                     self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                 def _getavailablepackfiles(self):
                     """For each pack file (a index/data file combo), yields:
                       (full path without extension, mtime, size)
                     mtime will be the mtime of the index/data file (whichever is newer)
                     size is the combined size of index/data file
                     """
                     indexsuffixlen = len(self.INDEXSUFFIX)
                     packsuffixlen = len(self.PACKSUFFIX)
                     ids = set()
                     sizes = collections.defaultdict(lambda: 0)
                     mtimes = collections.defaultdict(lambda: [])
                     try:
                         for filename, type, stat in osutil.listdir(self.path, stat=True):
                             id = None
                             if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                 id = filename[:-indexsuffixlen]
                             elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                 id = filename[:-packsuffixlen]
                             # Since we expect to have two files corresponding to each ID
                             # (the index file and the pack file), we can yield once we see
                             # it twice.
                             if id:
                                 sizes[id] += stat.st_size  # Sum both files' sizes together
                                 mtimes[id].append(stat.st_mtime)
                                 if id in ids:
                                     yield (
                                         os.path.join(self.path, id),
                                         max(mtimes[id]),
                                         sizes[id],
                                     )
                                 else:
                                     ids.add(id)
                     except OSError as ex:
                         if ex.errno != errno.ENOENT:
                             raise
                 def _getavailablepackfilessorted(self):
                     """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                     yielding newest files first.
                     This is desirable, since it is more likely newer packfiles have more
                     desirable data.
                     """
                     files = []
                     for path, mtime, size in self._getavailablepackfiles():
                         files.append((mtime, size, path))
                     files = sorted(files, reverse=True)
                     for mtime, size, path in files:
                         yield path, mtime, size
                 def gettotalsizeandcount(self):
                     """Returns the total disk size (in bytes) of all the pack files in
                     this store, and the count of pack files.
                     (This might be smaller than the total size of the ``self.path``
                     directory, since this only considers fuly-writen pack files, and not
                     temporary files or other detritus on the directory.)
                     """
                     totalsize = 0
                     count = 0
                     for __, __, size in self._getavailablepackfiles():
                         totalsize += size
                         count += 1
                     return totalsize, count
                 def getmetrics(self):
                     """Returns metrics on the state of this store."""
                     size, count = self.gettotalsizeandcount()
                     return {
                         b'numpacks': count,
                         b'totalpacksize': size,
                     }
                 def getpack(self, path):
                     raise NotImplementedError()
                 def getmissing(self, keys):
                     missing = keys
                     for pack in self.packs:
                         missing = pack.getmissing(missing)
                         # Ensures better performance of the cache by keeping the most
                         # recently accessed pack at the beginning in subsequent iterations.
                         if not missing:
                             return missing
                     if missing:
                         for pack in self.refresh():
                             missing = pack.getmissing(missing)
                     return missing
                 def markledger(self, ledger, options=None):
                     for pack in self.packs:
                         pack.markledger(ledger)
                 def markforrefresh(self):
                     """Tells the store that there may be new pack files, so the next time it
                     has a lookup miss it should check for new files."""
                     self.lastrefresh = 0
                 def refresh(self):
                     """Checks for any new packs on disk, adds them to the main pack list,
                     and returns a list of just the new packs."""
                     now = time.time()
                     # If we experience a lot of misses (like in the case of getmissing() on
                     # new objects), let's only actually check disk for new stuff every once
                     # in a while. Generally this code path should only ever matter when a
                     # repack is going on in the background, and that should be pretty rare
                     # to have that happen twice in quick succession.
                     newpacks = []
                     if now > self.lastrefresh + REFRESHRATE:
                         self.lastrefresh = now
                         previous = set(p.path for p in self.packs)
                         for filepath, __, __ in self._getavailablepackfilessorted():
                             if filepath not in previous:
                                 newpack = self.getpack(filepath)
                                 newpacks.append(newpack)
                                 self.packs.add(newpack)
                     return newpacks
             class versionmixin(object):
                 # Mix-in for classes with multiple supported versions
                 VERSION = None
                 SUPPORTED_VERSIONS = [2]
                 def _checkversion(self, version):
                     if version in self.SUPPORTED_VERSIONS:
                         if self.VERSION is None:
                             # only affect this instance
                             self.VERSION = version
                         elif self.VERSION != version:
                             raise RuntimeError(b'inconsistent version: %d' % version)
                     else:
                         raise RuntimeError(b'unsupported version: %d' % version)
             class basepack(versionmixin):
                 # The maximum amount we should read via mmap before remmaping so the old
                 # pages can be released (100MB)
                 MAXPAGEDIN = 100 * 1024 ** 2
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, path):
                     self.path = path
                     self.packpath = path + self.PACKSUFFIX
                     self.indexpath = path + self.INDEXSUFFIX
                     self.indexsize = os.stat(self.indexpath).st_size
                     self.datasize = os.stat(self.packpath).st_size
                     self._index = None
                     self._data = None
                     self.freememory()  # initialize the mmap
                     version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
                     self._checkversion(version)
                     version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
                     self._checkversion(version)
                     if 0b10000000 & config:
                         self.params = indexparams(LARGEFANOUTPREFIX, version)
                     else:
                         self.params = indexparams(SMALLFANOUTPREFIX, version)
                 @util.propertycache
                 def _fanouttable(self):
                     params = self.params
                     rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
                     fanouttable = []
                     for i in pycompat.xrange(0, params.fanoutcount):
                         loc = i * 4
                         fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
                         fanouttable.append(fanoutentry)
                     return fanouttable
                 @util.propertycache
                 def _indexend(self):
                     nodecount = struct.unpack_from(
                         b'!Q', self._index, self.params.indexstart - 8
                     )[0]
                     return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                 def freememory(self):
                     """Unmap and remap the memory to free it up after known expensive
                     operations. Return True if self._data and self._index were reloaded.
                     """
                     if self._index:
                         if self._pagedin < self.MAXPAGEDIN:
                             return False
                         self._index.close()
                         self._data.close()
                     # TODO: use an opener/vfs to access these paths
                     with open(self.indexpath, PACKOPENMODE) as indexfp:
                         # memory-map the file, size 0 means whole file
                         self._index = mmap.mmap(
                             indexfp.fileno(), 0, access=mmap.ACCESS_READ
                         )
                     with open(self.packpath, PACKOPENMODE) as datafp:
                         self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                     self._pagedin = 0
                     return True
                 def getmissing(self, keys):
                     raise NotImplementedError()
                 def markledger(self, ledger, options=None):
                     raise NotImplementedError()
                 def cleanup(self, ledger):
                     raise NotImplementedError()
                 def __iter__(self):
                     raise NotImplementedError()
                 def iterentries(self):
                     raise NotImplementedError()
             class mutablebasepack(versionmixin):
                 def __init__(self, ui, packdir, version=2):
                     self._checkversion(version)
                     # TODO(augie): make this configurable
                     self._compressor = b'GZ'
                     opener = vfsmod.vfs(packdir)
                     opener.createmode = 0o444
                     self.opener = opener
                     self.entries = {}
                     shallowutil.mkstickygroupdir(ui, packdir)
                     self.packfp, self.packpath = opener.mkstemp(
                         suffix=self.PACKSUFFIX + b'-tmp'
                     )
                     self.idxfp, self.idxpath = opener.mkstemp(
                         suffix=self.INDEXSUFFIX + b'-tmp'
                     )
                     self.packfp = os.fdopen(self.packfp, 'wb+')
                     self.idxfp = os.fdopen(self.idxfp, 'wb+')
-                    self.sha = hashlib.sha1()
+                    self.sha = hashutil.sha1()
                     self._closed = False
                     # The opener provides no way of doing permission fixup on files created
                     # via mkstemp, so we must fix it ourselves. We can probably fix this
                     # upstream in vfs.mkstemp so we don't need to use the private method.
                     opener._fixfilemode(opener.join(self.packpath))
                     opener._fixfilemode(opener.join(self.idxpath))
                     # Write header
                     # TODO: make it extensible (ex: allow specifying compression algorithm,
                     # a flexible key/value header, delta algorithm, fanout size, etc)
                     versionbuf = struct.pack(b'!B', self.VERSION)  # unsigned 1 byte int
                     self.writeraw(versionbuf)
                 def __enter__(self):
                     return self
                 def __exit__(self, exc_type, exc_value, traceback):
                     if exc_type is None:
                         self.close()
                     else:
                         self.abort()
                 def abort(self):
                     # Unclean exit
                     self._cleantemppacks()
                 def writeraw(self, data):
                     self.packfp.write(data)
                     self.sha.update(data)
                 def close(self, ledger=None):
                     if self._closed:
                         return
                     try:
                         sha = nodemod.hex(self.sha.digest())
                         self.packfp.close()
                         self.writeindex()
                         if len(self.entries) == 0:
                             # Empty pack
                             self._cleantemppacks()
                             self._closed = True
                             return None
                         self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                         try:
                             self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                         except Exception as ex:
                             try:
                                 self.opener.unlink(sha + self.PACKSUFFIX)
                             except Exception:
                                 pass
                             # Throw exception 'ex' explicitly since a normal 'raise' would
                             # potentially throw an exception from the unlink cleanup.
                             raise ex
                     except Exception:
                         # Clean up temp packs in all exception cases
                         self._cleantemppacks()
                         raise
                     self._closed = True
                     result = self.opener.join(sha)
                     if ledger:
                         ledger.addcreated(result)
                     return result
                 def _cleantemppacks(self):
                     try:
                         self.opener.unlink(self.packpath)
                     except Exception:
                         pass
                     try:
                         self.opener.unlink(self.idxpath)
                     except Exception:
                         pass
                 def writeindex(self):
                     largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                     if largefanout:
                         params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                     else:
                         params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                     fanouttable = [EMPTYFANOUT] * params.fanoutcount
                     # Precompute the location of each entry
                     locations = {}
                     count = 0
                     for node in sorted(self.entries):
                         location = count * self.INDEXENTRYLENGTH
                         locations[node] = location
                         count += 1
                         # Must use [0] on the unpack result since it's always a tuple.
                         fanoutkey = struct.unpack(
                             params.fanoutstruct, node[: params.fanoutprefix]
                         )[0]
                         if fanouttable[fanoutkey] == EMPTYFANOUT:
                             fanouttable[fanoutkey] = location
                     rawfanouttable = b''
                     last = 0
                     for offset in fanouttable:
                         offset = offset if offset != EMPTYFANOUT else last
                         last = offset
                         rawfanouttable += struct.pack(b'!I', offset)
                     rawentrieslength = struct.pack(b'!Q', len(self.entries))
                     # The index offset is the it's location in the file. So after the 2 byte
                     # header and the fanouttable.
                     rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                     self._writeheader(params)
                     self.idxfp.write(rawfanouttable)
                     self.idxfp.write(rawentrieslength)
                     self.idxfp.write(rawindex)
                     self.idxfp.close()
                 def createindex(self, nodelocations):
                     raise NotImplementedError()
                 def _writeheader(self, indexparams):
                     # Index header
                     #    <version: 1 byte>
                     #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                     #    <unused: 7 bit> # future use (compression, delta format, etc)
                     config = 0
                     if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                         config = 0b10000000
                     self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
             class indexparams(object):
                 __slots__ = (
                     'fanoutprefix',
                     'fanoutstruct',
                     'fanoutcount',
                     'fanoutsize',
                     'indexstart',
                 )
                 def __init__(self, prefixsize, version):
                     self.fanoutprefix = prefixsize
                     # The struct pack format for fanout table location (i.e. the format that
                     # converts the node prefix into an integer location in the fanout
                     # table).
                     if prefixsize == SMALLFANOUTPREFIX:
                         self.fanoutstruct = b'!B'
                     elif prefixsize == LARGEFANOUTPREFIX:
                         self.fanoutstruct = b'!H'
                     else:
                         raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
                     # The number of fanout table entries
                     self.fanoutcount = 2 ** (prefixsize * 8)
                     # The total bytes used by the fanout table
                     self.fanoutsize = self.fanoutcount * 4
                     self.indexstart = FANOUTSTART + self.fanoutsize
                     # Skip the index length
                     self.indexstart += 8

hgext/remotefilelog/basestore.py

0 +3 -3

             from __future__ import absolute_import
             import errno
-            import hashlib
             import os
             import shutil
             import stat
             import time
             from mercurial.i18n import _
             from mercurial.node import bin, hex
             from mercurial.pycompat import open
             from mercurial import (
                 error,
                 pycompat,
                 util,
             )
+            from mercurial.utils import hashutil
             from . import (
                 constants,
                 shallowutil,
             )
             class basestore(object):
                 def __init__(self, repo, path, reponame, shared=False):
                     """Creates a remotefilelog store object for the given repo name.
                     `path` - The file path where this store keeps its data
                     `reponame` - The name of the repo. This is used to partition data from
                     many repos.
                     `shared` - True if this store is a shared cache of data from the central
                     server, for many repos on this machine. False means this store is for
                     the local data for one repo.
                     """
                     self.repo = repo
                     self.ui = repo.ui
                     self._path = path
                     self._reponame = reponame
                     self._shared = shared
                     self._uid = os.getuid() if not pycompat.iswindows else None
                     self._validatecachelog = self.ui.config(
                         b"remotefilelog", b"validatecachelog"
                     )
                     self._validatecache = self.ui.config(
                         b"remotefilelog", b"validatecache", b'on'
                     )
                     if self._validatecache not in (b'on', b'strict', b'off'):
                         self._validatecache = b'on'
                     if self._validatecache == b'off':
                         self._validatecache = False
                     if shared:
                         shallowutil.mkstickygroupdir(self.ui, path)
                 def getmissing(self, keys):
                     missing = []
                     for name, node in keys:
                         filepath = self._getfilepath(name, node)
                         exists = os.path.exists(filepath)
                         if (
                             exists
                             and self._validatecache == b'strict'
                             and not self._validatekey(filepath, b'contains')
                         ):
                             exists = False
                         if not exists:
                             missing.append((name, node))
                     return missing
                 # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
                 def markledger(self, ledger, options=None):
                     if options and options.get(constants.OPTION_PACKSONLY):
                         return
                     if self._shared:
                         for filename, nodes in self._getfiles():
                             for node in nodes:
                                 ledger.markdataentry(self, filename, node)
                                 ledger.markhistoryentry(self, filename, node)
                 def cleanup(self, ledger):
                     ui = self.ui
                     entries = ledger.sources.get(self, [])
                     count = 0
                     progress = ui.makeprogress(
                         _(b"cleaning up"), unit=b"files", total=len(entries)
                     )
                     for entry in entries:
                         if entry.gced or (entry.datarepacked and entry.historyrepacked):
                             progress.update(count)
                             path = self._getfilepath(entry.filename, entry.node)
                             util.tryunlink(path)
                         count += 1
                     progress.complete()
                     # Clean up the repo cache directory.
                     self._cleanupdirectory(self._getrepocachepath())
                 # BELOW THIS ARE NON-STANDARD APIS
                 def _cleanupdirectory(self, rootdir):
                     """Removes the empty directories and unnecessary files within the root
                     directory recursively. Note that this method does not remove the root
                     directory itself. """
                     oldfiles = set()
                     otherfiles = set()
                     # osutil.listdir returns stat information which saves some rmdir/listdir
                     # syscalls.
                     for name, mode in util.osutil.listdir(rootdir):
                         if stat.S_ISDIR(mode):
                             dirpath = os.path.join(rootdir, name)
                             self._cleanupdirectory(dirpath)
                             # Now that the directory specified by dirpath is potentially
                             # empty, try and remove it.
                             try:
                                 os.rmdir(dirpath)
                             except OSError:
                                 pass
                         elif stat.S_ISREG(mode):
                             if name.endswith(b'_old'):
                                 oldfiles.add(name[:-4])
                             else:
                                 otherfiles.add(name)
                     # Remove the files which end with suffix '_old' and have no
                     # corresponding file without the suffix '_old'. See addremotefilelognode
                     # method for the generation/purpose of files with '_old' suffix.
                     for filename in oldfiles - otherfiles:
                         filepath = os.path.join(rootdir, filename + b'_old')
                         util.tryunlink(filepath)
                 def _getfiles(self):
                     """Return a list of (filename, [node,...]) for all the revisions that
                     exist in the store.
                     This is useful for obtaining a list of all the contents of the store
                     when performing a repack to another store, since the store API requires
                     name+node keys and not namehash+node keys.
                     """
                     existing = {}
                     for filenamehash, node in self._listkeys():
                         existing.setdefault(filenamehash, []).append(node)
                     filenamemap = self._resolvefilenames(existing.keys())
                     for filename, sha in pycompat.iteritems(filenamemap):
                         yield (filename, existing[sha])
                 def _resolvefilenames(self, hashes):
                     """Given a list of filename hashes that are present in the
                     remotefilelog store, return a mapping from filename->hash.
                     This is useful when converting remotefilelog blobs into other storage
                     formats.
                     """
                     if not hashes:
                         return {}
                     filenames = {}
                     missingfilename = set(hashes)
                     # Start with a full manifest, since it'll cover the majority of files
                     for filename in self.repo[b'tip'].manifest():
-                        sha = hashlib.sha1(filename).digest()
+                        sha = hashutil.sha1(filename).digest()
                         if sha in missingfilename:
                             filenames[filename] = sha
                             missingfilename.discard(sha)
                     # Scan the changelog until we've found every file name
                     cl = self.repo.unfiltered().changelog
                     for rev in pycompat.xrange(len(cl) - 1, -1, -1):
                         if not missingfilename:
                             break
                         files = cl.readfiles(cl.node(rev))
                         for filename in files:
-                            sha = hashlib.sha1(filename).digest()
+                            sha = hashutil.sha1(filename).digest()
                             if sha in missingfilename:
                                 filenames[filename] = sha
                                 missingfilename.discard(sha)
                     return filenames
                 def _getrepocachepath(self):
                     return (
                         os.path.join(self._path, self._reponame)
                         if self._shared
                         else self._path
                     )
                 def _listkeys(self):
                     """List all the remotefilelog keys that exist in the store.
                     Returns a iterator of (filename hash, filecontent hash) tuples.
                     """
                     for root, dirs, files in os.walk(self._getrepocachepath()):
                         for filename in files:
                             if len(filename) != 40:
                                 continue
                             node = filename
                             if self._shared:
                                 # .../1a/85ffda..be21
                                 filenamehash = root[-41:-39] + root[-38:]
                             else:
                                 filenamehash = root[-40:]
                             yield (bin(filenamehash), bin(node))
                 def _getfilepath(self, name, node):
                     node = hex(node)
                     if self._shared:
                         key = shallowutil.getcachekey(self._reponame, name, node)
                     else:
                         key = shallowutil.getlocalkey(name, node)
                     return os.path.join(self._path, key)
                 def _getdata(self, name, node):
                     filepath = self._getfilepath(name, node)
                     try:
                         data = shallowutil.readfile(filepath)
                         if self._validatecache and not self._validatedata(data, filepath):
                             if self._validatecachelog:
                                 with open(self._validatecachelog, b'a+') as f:
                                     f.write(b"corrupt %s during read\n" % filepath)
                             os.rename(filepath, filepath + b".corrupt")
                             raise KeyError(b"corrupt local cache file %s" % filepath)
                     except IOError:
                         raise KeyError(
                             b"no file found at %s for %s:%s" % (filepath, name, hex(node))
                         )
                     return data
                 def addremotefilelognode(self, name, node, data):
                     filepath = self._getfilepath(name, node)
                     oldumask = os.umask(0o002)
                     try:
                         # if this node already exists, save the old version for
                         # recovery/debugging purposes.
                         if os.path.exists(filepath):
                             newfilename = filepath + b'_old'
                             # newfilename can be read-only and shutil.copy will fail.
                             # Delete newfilename to avoid it
                             if os.path.exists(newfilename):
                                 shallowutil.unlinkfile(newfilename)
                             shutil.copy(filepath, newfilename)
                         shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
                         shallowutil.writefile(filepath, data, readonly=True)
                         if self._validatecache:
                             if not self._validatekey(filepath, b'write'):
                                 raise error.Abort(
                                     _(b"local cache write was corrupted %s") % filepath
                                 )
                     finally:
                         os.umask(oldumask)
                 def markrepo(self, path):
                     """Call this to add the given repo path to the store's list of
                     repositories that are using it. This is useful later when doing garbage
                     collection, since it allows us to insecpt the repos to see what nodes
                     they want to be kept alive in the store.
                     """
                     repospath = os.path.join(self._path, b"repos")
                     with open(repospath, b'ab') as reposfile:
                         reposfile.write(os.path.dirname(path) + b"\n")
                     repospathstat = os.stat(repospath)
                     if repospathstat.st_uid == self._uid:
                         os.chmod(repospath, 0o0664)
                 def _validatekey(self, path, action):
                     with open(path, b'rb') as f:
                         data = f.read()
                     if self._validatedata(data, path):
                         return True
                     if self._validatecachelog:
                         with open(self._validatecachelog, b'ab+') as f:
                             f.write(b"corrupt %s during %s\n" % (path, action))
                     os.rename(path, path + b".corrupt")
                     return False
                 def _validatedata(self, data, path):
                     try:
                         if len(data) > 0:
                             # see remotefilelogserver.createfileblob for the format
                             offset, size, flags = shallowutil.parsesizeflags(data)
                             if len(data) <= size:
                                 # it is truncated
                                 return False
                             # extract the node from the metadata
                             offset += size
                             datanode = data[offset : offset + 20]
                             # and compare against the path
                             if os.path.basename(path) == hex(datanode):
                                 # Content matches the intended path
                                 return True
                             return False
                     except (ValueError, RuntimeError):
                         pass
                     return False
                 def gc(self, keepkeys):
                     ui = self.ui
                     cachepath = self._path
                     # prune cache
                     queue = pycompat.queue.PriorityQueue()
                     originalsize = 0
                     size = 0
                     count = 0
                     removed = 0
                     # keep files newer than a day even if they aren't needed
                     limit = time.time() - (60 * 60 * 24)
                     progress = ui.makeprogress(
                         _(b"removing unnecessary files"), unit=b"files"
                     )
                     progress.update(0)
                     for root, dirs, files in os.walk(cachepath):
                         for file in files:
                             if file == b'repos':
                                 continue
                             # Don't delete pack files
                             if b'/packs/' in root:
                                 continue
                             progress.update(count)
                             path = os.path.join(root, file)
                             key = os.path.relpath(path, cachepath)
                             count += 1
                             try:
                                 pathstat = os.stat(path)
                             except OSError as e:
                                 # errno.ENOENT = no such file or directory
                                 if e.errno != errno.ENOENT:
                                     raise
                                 msg = _(
                                     b"warning: file %s was removed by another process\n"
                                 )
                                 ui.warn(msg % path)
                                 continue
                             originalsize += pathstat.st_size
                             if key in keepkeys or pathstat.st_atime > limit:
                                 queue.put((pathstat.st_atime, path, pathstat))
                                 size += pathstat.st_size
                             else:
                                 try:
                                     shallowutil.unlinkfile(path)
                                 except OSError as e:
                                     # errno.ENOENT = no such file or directory
                                     if e.errno != errno.ENOENT:
                                         raise
                                     msg = _(
                                         b"warning: file %s was removed by another "
                                         b"process\n"
                                     )
                                     ui.warn(msg % path)
                                     continue
                                 removed += 1
                     progress.complete()
                     # remove oldest files until under limit
                     limit = ui.configbytes(b"remotefilelog", b"cachelimit")
                     if size > limit:
                         excess = size - limit
                         progress = ui.makeprogress(
                             _(b"enforcing cache limit"), unit=b"bytes", total=excess
                         )
                         removedexcess = 0
                         while queue and size > limit and size > 0:
                             progress.update(removedexcess)
                             atime, oldpath, oldpathstat = queue.get()
                             try:
                                 shallowutil.unlinkfile(oldpath)
                             except OSError as e:
                                 # errno.ENOENT = no such file or directory
                                 if e.errno != errno.ENOENT:
                                     raise
                                 msg = _(
                                     b"warning: file %s was removed by another process\n"
                                 )
                                 ui.warn(msg % oldpath)
                             size -= oldpathstat.st_size
                             removed += 1
                             removedexcess += oldpathstat.st_size
                         progress.complete()
                     ui.status(
                         _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
                         % (
                             removed,
                             count,
                             float(originalsize) / 1024.0 / 1024.0 / 1024.0,
                             float(size) / 1024.0 / 1024.0 / 1024.0,
                         )
                     )
             class baseunionstore(object):
                 def __init__(self, *args, **kwargs):
                     # If one of the functions that iterates all of the stores is about to
                     # throw a KeyError, try this many times with a full refresh between
                     # attempts. A repack operation may have moved data from one store to
                     # another while we were running.
                     self.numattempts = kwargs.get('numretries', 0) + 1
                     # If not-None, call this function on every retry and if the attempts are
                     # exhausted.
                     self.retrylog = kwargs.get('retrylog', None)
                 def markforrefresh(self):
                     for store in self.stores:
                         if util.safehasattr(store, b'markforrefresh'):
                             store.markforrefresh()
                 @staticmethod
                 def retriable(fn):
                     def noop(*args):
                         pass
                     def wrapped(self, *args, **kwargs):
                         retrylog = self.retrylog or noop
                         funcname = fn.__name__
                         i = 0
                         while i < self.numattempts:
                             if i > 0:
                                 retrylog(
                                     b're-attempting (n=%d) %s\n'
                                     % (i, pycompat.sysbytes(funcname))
                                 )
                                 self.markforrefresh()
                             i += 1
                             try:
                                 return fn(self, *args, **kwargs)
                             except KeyError:
                                 if i == self.numattempts:
                                     # retries exhausted
                                     retrylog(
                                         b'retries exhausted in %s, raising KeyError\n'
                                         % pycompat.sysbytes(funcname)
                                     )
                                     raise
                     return wrapped

hgext/remotefilelog/debugcommands.py

0 +3 -3

             # debugcommands.py - debug logic for remotefilelog
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
-            import hashlib
             import os
             import zlib
             from mercurial.node import bin, hex, nullid, short
             from mercurial.i18n import _
             from mercurial.pycompat import open
             from mercurial import (
                 error,
                 filelog,
                 lock as lockmod,
                 node as nodemod,
                 pycompat,
                 revlog,
             )
+            from mercurial.utils import hashutil
             from . import (
                 constants,
                 datapack,
                 fileserverclient,
                 historypack,
                 repack,
                 shallowutil,
             )
             def debugremotefilelog(ui, path, **opts):
                 decompress = opts.get('decompress')
                 size, firstnode, mapping = parsefileblob(path, decompress)
                 ui.status(_(b"size: %d bytes\n") % size)
                 ui.status(_(b"path: %s \n") % path)
                 ui.status(_(b"key: %s \n") % (short(firstnode)))
                 ui.status(_(b"\n"))
                 ui.status(
                     _(b"%12s => %12s %13s %13s %12s\n")
                     % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
                 )
                 queue = [firstnode]
                 while queue:
                     node = queue.pop(0)
                     p1, p2, linknode, copyfrom = mapping[node]
                     ui.status(
                         _(b"%s => %s  %s  %s  %s\n")
                         % (short(node), short(p1), short(p2), short(linknode), copyfrom)
                     )
                     if p1 != nullid:
                         queue.append(p1)
                     if p2 != nullid:
                         queue.append(p2)
             def buildtemprevlog(repo, file):
                 # get filename key
-                filekey = nodemod.hex(hashlib.sha1(file).digest())
+                filekey = nodemod.hex(hashutil.sha1(file).digest())
                 filedir = os.path.join(repo.path, b'store/data', filekey)
                 # sort all entries based on linkrev
                 fctxs = []
                 for filenode in os.listdir(filedir):
                     if b'_old' not in filenode:
                         fctxs.append(repo.filectx(file, fileid=bin(filenode)))
                 fctxs = sorted(fctxs, key=lambda x: x.linkrev())
                 # add to revlog
                 temppath = repo.sjoin(b'data/temprevlog.i')
                 if os.path.exists(temppath):
                     os.remove(temppath)
                 r = filelog.filelog(repo.svfs, b'temprevlog')
                 class faket(object):
                     def add(self, a, b, c):
                         pass
                 t = faket()
                 for fctx in fctxs:
                     if fctx.node() not in repo:
                         continue
                     p = fctx.filelog().parents(fctx.filenode())
                     meta = {}
                     if fctx.renamed():
                         meta[b'copy'] = fctx.renamed()[0]
                         meta[b'copyrev'] = hex(fctx.renamed()[1])
                     r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
                 return r
             def debugindex(orig, ui, repo, file_=None, **opts):
                 """dump the contents of an index file"""
                 if (
                     opts.get('changelog')
                     or opts.get('manifest')
                     or opts.get('dir')
                     or not shallowutil.isenabled(repo)
                     or not repo.shallowmatch(file_)
                 ):
                     return orig(ui, repo, file_, **opts)
                 r = buildtemprevlog(repo, file_)
                 # debugindex like normal
                 format = opts.get(b'format', 0)
                 if format not in (0, 1):
                     raise error.Abort(_(b"unknown format %d") % format)
                 generaldelta = r.version & revlog.FLAG_GENERALDELTA
                 if generaldelta:
                     basehdr = b' delta'
                 else:
                     basehdr = b'  base'
                 if format == 0:
                     ui.write(
                         (
                             b"   rev    offset  length " + basehdr + b" linkrev"
                             b" nodeid       p1           p2\n"
                         )
                     )
                 elif format == 1:
                     ui.write(
                         (
                             b"   rev flag   offset   length"
                             b"     size " + basehdr + b"   link     p1     p2"
                             b"       nodeid\n"
                         )
                     )
                 for i in r:
                     node = r.node(i)
                     if generaldelta:
                         base = r.deltaparent(i)
                     else:
                         base = r.chainbase(i)
                     if format == 0:
                         try:
                             pp = r.parents(node)
                         except Exception:
                             pp = [nullid, nullid]
                         ui.write(
                             b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
                             % (
                                 i,
                                 r.start(i),
                                 r.length(i),
                                 base,
                                 r.linkrev(i),
                                 short(node),
                                 short(pp[0]),
                                 short(pp[1]),
                             )
                         )
                     elif format == 1:
                         pr = r.parentrevs(i)
                         ui.write(
                             b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
                             % (
                                 i,
                                 r.flags(i),
                                 r.start(i),
                                 r.length(i),
                                 r.rawsize(i),
                                 base,
                                 r.linkrev(i),
                                 pr[0],
                                 pr[1],
                                 short(node),
                             )
                         )
             def debugindexdot(orig, ui, repo, file_):
                 """dump an index DAG as a graphviz dot file"""
                 if not shallowutil.isenabled(repo):
                     return orig(ui, repo, file_)
                 r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
                 ui.writenoi18n(b"digraph G {\n")
                 for i in r:
                     node = r.node(i)
                     pp = r.parents(node)
                     ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
                     if pp[1] != nullid:
                         ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
                 ui.write(b"}\n")
             def verifyremotefilelog(ui, path, **opts):
                 decompress = opts.get('decompress')
                 for root, dirs, files in os.walk(path):
                     for file in files:
                         if file == b"repos":
                             continue
                         filepath = os.path.join(root, file)
                         size, firstnode, mapping = parsefileblob(filepath, decompress)
                         for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping):
                             if linknode == nullid:
                                 actualpath = os.path.relpath(root, path)
                                 key = fileserverclient.getcachekey(
                                     b"reponame", actualpath, file
                                 )
                                 ui.status(
                                     b"%s %s\n" % (key, os.path.relpath(filepath, path))
                                 )
             def _decompressblob(raw):
                 return zlib.decompress(raw)
             def parsefileblob(path, decompress):
                 f = open(path, b"rb")
                 try:
                     raw = f.read()
                 finally:
                     f.close()
                 if decompress:
                     raw = _decompressblob(raw)
                 offset, size, flags = shallowutil.parsesizeflags(raw)
                 start = offset + size
                 firstnode = None
                 mapping = {}
                 while start < len(raw):
                     divider = raw.index(b'\0', start + 80)
                     currentnode = raw[start : (start + 20)]
                     if not firstnode:
                         firstnode = currentnode
                     p1 = raw[(start + 20) : (start + 40)]
                     p2 = raw[(start + 40) : (start + 60)]
                     linknode = raw[(start + 60) : (start + 80)]
                     copyfrom = raw[(start + 80) : divider]
                     mapping[currentnode] = (p1, p2, linknode, copyfrom)
                     start = divider + 1
                 return size, firstnode, mapping
             def debugdatapack(ui, *paths, **opts):
                 for path in paths:
                     if b'.data' in path:
                         path = path[: path.index(b'.data')]
                     ui.write(b"%s:\n" % path)
                     dpack = datapack.datapack(path)
                     node = opts.get('node')
                     if node:
                         deltachain = dpack.getdeltachain(b'', bin(node))
                         dumpdeltachain(ui, deltachain, **opts)
                         return
                     if opts.get('long'):
                         hashformatter = hex
                         hashlen = 42
                     else:
                         hashformatter = short
                         hashlen = 14
                     lastfilename = None
                     totaldeltasize = 0
                     totalblobsize = 0
                     def printtotals():
                         if lastfilename is not None:
                             ui.write(b"\n")
                         if not totaldeltasize or not totalblobsize:
                             return
                         difference = totalblobsize - totaldeltasize
                         deltastr = b"%0.1f%% %s" % (
                             (100.0 * abs(difference) / totalblobsize),
                             (b"smaller" if difference > 0 else b"bigger"),
                         )
                         ui.writenoi18n(
                             b"Total:%s%s  %s (%s)\n"
                             % (
                                 b"".ljust(2 * hashlen - len(b"Total:")),
                                 (b'%d' % totaldeltasize).ljust(12),
                                 (b'%d' % totalblobsize).ljust(9),
                                 deltastr,
                             )
                         )
                     bases = {}
                     nodes = set()
                     failures = 0
                     for filename, node, deltabase, deltalen in dpack.iterentries():
                         bases[node] = deltabase
                         if node in nodes:
                             ui.write((b"Bad entry: %s appears twice\n" % short(node)))
                             failures += 1
                         nodes.add(node)
                         if filename != lastfilename:
                             printtotals()
                             name = b'(empty name)' if filename == b'' else filename
                             ui.write(b"%s:\n" % name)
                             ui.write(
                                 b"%s%s%s%s\n"
                                 % (
                                     b"Node".ljust(hashlen),
                                     b"Delta Base".ljust(hashlen),
                                     b"Delta Length".ljust(14),
                                     b"Blob Size".ljust(9),
                                 )
                             )
                             lastfilename = filename
                             totalblobsize = 0
                             totaldeltasize = 0
                         # Metadata could be missing, in which case it will be an empty dict.
                         meta = dpack.getmeta(filename, node)
                         if constants.METAKEYSIZE in meta:
                             blobsize = meta[constants.METAKEYSIZE]
                             totaldeltasize += deltalen
                             totalblobsize += blobsize
                         else:
                             blobsize = b"(missing)"
                         ui.write(
                             b"%s  %s  %s%s\n"
                             % (
                                 hashformatter(node),
                                 hashformatter(deltabase),
                                 (b'%d' % deltalen).ljust(14),
                                 pycompat.bytestr(blobsize),
                             )
                         )
                     if filename is not None:
                         printtotals()
                     failures += _sanitycheck(ui, set(nodes), bases)
                     if failures > 1:
                         ui.warn((b"%d failures\n" % failures))
                         return 1
             def _sanitycheck(ui, nodes, bases):
                 """
                 Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
                 mapping of node->base):
                 - Each deltabase must itself be a node elsewhere in the pack
                 - There must be no cycles
                 """
                 failures = 0
                 for node in nodes:
                     seen = set()
                     current = node
                     deltabase = bases[current]
                     while deltabase != nullid:
                         if deltabase not in nodes:
                             ui.warn(
                                 (
                                     b"Bad entry: %s has an unknown deltabase (%s)\n"
                                     % (short(node), short(deltabase))
                                 )
                             )
                             failures += 1
                             break
                         if deltabase in seen:
                             ui.warn(
                                 (
                                     b"Bad entry: %s has a cycle (at %s)\n"
                                     % (short(node), short(deltabase))
                                 )
                             )
                             failures += 1
                             break
                         current = deltabase
                         seen.add(current)
                         deltabase = bases[current]
                     # Since ``node`` begins a valid chain, reset/memoize its base to nullid
                     # so we don't traverse it again.
                     bases[node] = nullid
                 return failures
             def dumpdeltachain(ui, deltachain, **opts):
                 hashformatter = hex
                 hashlen = 40
                 lastfilename = None
                 for filename, node, filename, deltabasenode, delta in deltachain:
                     if filename != lastfilename:
                         ui.write(b"\n%s\n" % filename)
                         lastfilename = filename
                     ui.write(
                         b"%s  %s  %s  %s\n"
                         % (
                             b"Node".ljust(hashlen),
                             b"Delta Base".ljust(hashlen),
                             b"Delta SHA1".ljust(hashlen),
                             b"Delta Length".ljust(6),
                         )
                     )
                     ui.write(
                         b"%s  %s  %s  %d\n"
                         % (
                             hashformatter(node),
                             hashformatter(deltabasenode),
-                            nodemod.hex(hashlib.sha1(delta).digest()),
+                            nodemod.hex(hashutil.sha1(delta).digest()),
                             len(delta),
                         )
                     )
             def debughistorypack(ui, path):
                 if b'.hist' in path:
                     path = path[: path.index(b'.hist')]
                 hpack = historypack.historypack(path)
                 lastfilename = None
                 for entry in hpack.iterentries():
                     filename, node, p1node, p2node, linknode, copyfrom = entry
                     if filename != lastfilename:
                         ui.write(b"\n%s\n" % filename)
                         ui.write(
                             b"%s%s%s%s%s\n"
                             % (
                                 b"Node".ljust(14),
                                 b"P1 Node".ljust(14),
                                 b"P2 Node".ljust(14),
                                 b"Link Node".ljust(14),
                                 b"Copy From",
                             )
                         )
                         lastfilename = filename
                     ui.write(
                         b"%s  %s  %s  %s  %s\n"
                         % (
                             short(node),
                             short(p1node),
                             short(p2node),
                             short(linknode),
                             copyfrom,
                         )
                     )
             def debugwaitonrepack(repo):
                 with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
                     return
             def debugwaitonprefetch(repo):
                 with repo._lock(
                     repo.svfs,
                     b"prefetchlock",
                     True,
                     None,
                     None,
                     _(b'prefetching in %s') % repo.origroot,
                 ):
                     pass

hgext/remotefilelog/fileserverclient.py

0 +6 -4

             # fileserverclient.py - client for communicating with the cache process
             #
             # Copyright 2013 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
-            import hashlib
             import io
             import os
             import threading
             import time
             import zlib
             from mercurial.i18n import _
             from mercurial.node import bin, hex, nullid
             from mercurial import (
                 error,
                 node,
                 pycompat,
                 revlog,
                 sshpeer,
                 util,
                 wireprotov1peer,
             )
-            from mercurial.utils import procutil
+            from mercurial.utils import (
+                hashutil,
+                procutil,
+            )
             from . import (
                 constants,
                 contentstore,
                 metadatastore,
             )
             _sshv1peer = sshpeer.sshv1peer
             # Statistics for debugging
             fetchcost = 0
             fetches = 0
             fetched = 0
             fetchmisses = 0
             _lfsmod = None
             def getcachekey(reponame, file, id):
-                pathhash = node.hex(hashlib.sha1(file).digest())
+                pathhash = node.hex(hashutil.sha1(file).digest())
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
-                pathhash = node.hex(hashlib.sha1(file).digest())
+                pathhash = node.hex(hashutil.sha1(file).digest())
                 return os.path.join(pathhash, id)
             def peersetup(ui, peer):
                 class remotefilepeer(peer.__class__):
                     @wireprotov1peer.batchable
                     def x_rfl_getfile(self, file, node):
                         if not self.capable(b'x_rfl_getfile'):
                             raise error.Abort(
                                 b'configured remotefile server does not support getfile'
                             )
                         f = wireprotov1peer.future()
                         yield {b'file': file, b'node': node}, f
                         code, data = f.value.split(b'\0', 1)
                         if int(code):
                             raise error.LookupError(file, node, data)
                         yield data
                     @wireprotov1peer.batchable
                     def x_rfl_getflogheads(self, path):
                         if not self.capable(b'x_rfl_getflogheads'):
                             raise error.Abort(
                                 b'configured remotefile server does not '
                                 b'support getflogheads'
                             )
                         f = wireprotov1peer.future()
                         yield {b'path': path}, f
                         heads = f.value.split(b'\n') if f.value else []
                         yield heads
                     def _updatecallstreamopts(self, command, opts):
                         if command != b'getbundle':
                             return
                         if (
                             constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                             not in self.capabilities()
                         ):
                             return
                         if not util.safehasattr(self, '_localrepo'):
                             return
                         if (
                             constants.SHALLOWREPO_REQUIREMENT
                             not in self._localrepo.requirements
                         ):
                             return
                         bundlecaps = opts.get(b'bundlecaps')
                         if bundlecaps:
                             bundlecaps = [bundlecaps]
                         else:
                             bundlecaps = []
                         # shallow, includepattern, and excludepattern are a hacky way of
                         # carrying over data from the local repo to this getbundle
                         # command. We need to do it this way because bundle1 getbundle
                         # doesn't provide any other place we can hook in to manipulate
                         # getbundle args before it goes across the wire. Once we get rid
                         # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                         # do this more cleanly.
                         bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                         if self._localrepo.includepattern:
                             patterns = b'\0'.join(self._localrepo.includepattern)
                             includecap = b"includepattern=" + patterns
                             bundlecaps.append(includecap)
                         if self._localrepo.excludepattern:
                             patterns = b'\0'.join(self._localrepo.excludepattern)
                             excludecap = b"excludepattern=" + patterns
                             bundlecaps.append(excludecap)
                         opts[b'bundlecaps'] = b','.join(bundlecaps)
                     def _sendrequest(self, command, args, **opts):
                         self._updatecallstreamopts(command, args)
                         return super(remotefilepeer, self)._sendrequest(
                             command, args, **opts
                         )
                     def _callstream(self, command, **opts):
                         supertype = super(remotefilepeer, self)
                         if not util.safehasattr(supertype, '_sendrequest'):
                             self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                         return super(remotefilepeer, self)._callstream(command, **opts)
                 peer.__class__ = remotefilepeer
             class cacheconnection(object):
                 """The connection for communicating with the remote cache. Performs
                 gets and sets by communicating with an external process that has the
                 cache-specific implementation.
                 """
                 def __init__(self):
                     self.pipeo = self.pipei = self.pipee = None
                     self.subprocess = None
                     self.connected = False
                 def connect(self, cachecommand):
                     if self.pipeo:
                         raise error.Abort(_(b"cache connection already open"))
                     self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
                         cachecommand
                     )
                     self.connected = True
                 def close(self):
                     def tryclose(pipe):
                         try:
                             pipe.close()
                         except Exception:
                             pass
                     if self.connected:
                         try:
                             self.pipei.write(b"exit\n")
                         except Exception:
                             pass
                         tryclose(self.pipei)
                         self.pipei = None
                         tryclose(self.pipeo)
                         self.pipeo = None
                         tryclose(self.pipee)
                         self.pipee = None
                         try:
                             # Wait for process to terminate, making sure to avoid deadlock.
                             # See https://docs.python.org/2/library/subprocess.html for
                             # warnings about wait() and deadlocking.
                             self.subprocess.communicate()
                         except Exception:
                             pass
                         self.subprocess = None
                     self.connected = False
                 def request(self, request, flush=True):
                     if self.connected:
                         try:
                             self.pipei.write(request)
                             if flush:
                                 self.pipei.flush()
                         except IOError:
                             self.close()
                 def receiveline(self):
                     if not self.connected:
                         return None
                     try:
                         result = self.pipeo.readline()[:-1]
                         if not result:
                             self.close()
                     except IOError:
                         self.close()
                     return result
             def _getfilesbatch(
                 remote, receivemissing, progresstick, missed, idmap, batchsize
             ):
                 # Over http(s), iterbatch is a streamy method and we can start
                 # looking at results early. This means we send one (potentially
                 # large) request, but then we show nice progress as we process
                 # file results, rather than showing chunks of $batchsize in
                 # progress.
                 #
                 # Over ssh, iterbatch isn't streamy because batch() wasn't
                 # explicitly designed as a streaming method. In the future we
                 # should probably introduce a streambatch() method upstream and
                 # use that for this.
                 with remote.commandexecutor() as e:
                     futures = []
                     for m in missed:
                         futures.append(
                             e.callcommand(
                                 b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
                             )
                         )
                     for i, m in enumerate(missed):
                         r = futures[i].result()
                         futures[i] = None  # release memory
                         file_ = idmap[m]
                         node = m[-40:]
                         receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
                         progresstick()
             def _getfiles_optimistic(
                 remote, receivemissing, progresstick, missed, idmap, step
             ):
                 remote._callstream(b"x_rfl_getfiles")
                 i = 0
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 while i < len(missed):
                     # issue a batch of requests
                     start = i
                     end = min(len(missed), start + step)
                     i = end
                     for missingid in missed[start:end]:
                         # issue new request
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = b"%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                     # receive batch results
                     for missingid in missed[start:end]:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         receivemissing(pipei, file, versionid)
                         progresstick()
                 # End the command
                 pipeo.write(b'\n')
                 pipeo.flush()
             def _getfiles_threaded(
                 remote, receivemissing, progresstick, missed, idmap, step
             ):
                 remote._callstream(b"getfiles")
                 pipeo = remote._pipeo
                 pipei = remote._pipei
                 def writer():
                     for missingid in missed:
                         versionid = missingid[-40:]
                         file = idmap[missingid]
                         sshrequest = b"%s%s\n" % (versionid, file)
                         pipeo.write(sshrequest)
                     pipeo.flush()
                 writerthread = threading.Thread(target=writer)
                 writerthread.daemon = True
                 writerthread.start()
                 for missingid in missed:
                     versionid = missingid[-40:]
                     file = idmap[missingid]
                     receivemissing(pipei, file, versionid)
                     progresstick()
                 writerthread.join()
                 # End the command
                 pipeo.write(b'\n')
                 pipeo.flush()
             class fileserverclient(object):
                 """A client for requesting files from the remote file server.
                 """
                 def __init__(self, repo):
                     ui = repo.ui
                     self.repo = repo
                     self.ui = ui
                     self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
                     if self.cacheprocess:
                         self.cacheprocess = util.expandpath(self.cacheprocess)
                     # This option causes remotefilelog to pass the full file path to the
                     # cacheprocess instead of a hashed key.
                     self.cacheprocesspasspath = ui.configbool(
                         b"remotefilelog", b"cacheprocess.includepath"
                     )
                     self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
                     self.remotecache = cacheconnection()
                 def setstore(self, datastore, historystore, writedata, writehistory):
                     self.datastore = datastore
                     self.historystore = historystore
                     self.writedata = writedata
                     self.writehistory = writehistory
                 def _connect(self):
                     return self.repo.connectionpool.get(self.repo.fallbackpath)
                 def request(self, fileids):
                     """Takes a list of filename/node pairs and fetches them from the
                     server. Files are stored in the local cache.
                     A list of nodes that the server couldn't find is returned.
                     If the connection fails, an exception is raised.
                     """
                     if not self.remotecache.connected:
                         self.connect()
                     cache = self.remotecache
                     writedata = self.writedata
                     repo = self.repo
                     total = len(fileids)
                     request = b"get\n%d\n" % total
                     idmap = {}
                     reponame = repo.name
                     for file, id in fileids:
                         fullid = getcachekey(reponame, file, id)
                         if self.cacheprocesspasspath:
                             request += file + b'\0'
                         request += fullid + b"\n"
                         idmap[fullid] = file
                     cache.request(request)
                     progress = self.ui.makeprogress(_(b'downloading'), total=total)
                     progress.update(0)
                     missed = []
                     while True:
                         missingid = cache.receiveline()
                         if not missingid:
                             missedset = set(missed)
                             for missingid in idmap:
                                 if not missingid in missedset:
                                     missed.append(missingid)
                             self.ui.warn(
                                 _(
                                     b"warning: cache connection closed early - "
                                     + b"falling back to server\n"
                                 )
                             )
                             break
                         if missingid == b"0":
                             break
                         if missingid.startswith(b"_hits_"):
                             # receive progress reports
                             parts = missingid.split(b"_")
                             progress.increment(int(parts[2]))
                             continue
                         missed.append(missingid)
                     global fetchmisses
                     fetchmisses += len(missed)
                     fromcache = total - len(missed)
                     progress.update(fromcache, total=total)
                     self.ui.log(
                         b"remotefilelog",
                         b"remote cache hit rate is %r of %r\n",
                         fromcache,
                         total,
                         hit=fromcache,
                         total=total,
                     )
                     oldumask = os.umask(0o002)
                     try:
                         # receive cache misses from master
                         if missed:
                             # When verbose is true, sshpeer prints 'running ssh...'
                             # to stdout, which can interfere with some command
                             # outputs
                             verbose = self.ui.verbose
                             self.ui.verbose = False
                             try:
                                 with self._connect() as conn:
                                     remote = conn.peer
                                     if remote.capable(
                                         constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                                     ):
                                         if not isinstance(remote, _sshv1peer):
                                             raise error.Abort(
                                                 b'remotefilelog requires ssh servers'
                                             )
                                         step = self.ui.configint(
                                             b'remotefilelog', b'getfilesstep'
                                         )
                                         getfilestype = self.ui.config(
                                             b'remotefilelog', b'getfilestype'
                                         )
                                         if getfilestype == b'threaded':
                                             _getfiles = _getfiles_threaded
                                         else:
                                             _getfiles = _getfiles_optimistic
                                         _getfiles(
                                             remote,
                                             self.receivemissing,
                                             progress.increment,
                                             missed,
                                             idmap,
                                             step,
                                         )
                                     elif remote.capable(b"x_rfl_getfile"):
                                         if remote.capable(b'batch'):
                                             batchdefault = 100
                                         else:
                                             batchdefault = 10
                                         batchsize = self.ui.configint(
                                             b'remotefilelog', b'batchsize', batchdefault
                                         )
                                         self.ui.debug(
                                             b'requesting %d files from '
                                             b'remotefilelog server...\n' % len(missed)
                                         )
                                         _getfilesbatch(
                                             remote,
                                             self.receivemissing,
                                             progress.increment,
                                             missed,
                                             idmap,
                                             batchsize,
                                         )
                                     else:
                                         raise error.Abort(
                                             b"configured remotefilelog server"
                                             b" does not support remotefilelog"
                                         )
                                 self.ui.log(
                                     b"remotefilefetchlog",
                                     b"Success\n",
                                     fetched_files=progress.pos - fromcache,
                                     total_to_fetch=total - fromcache,
                                 )
                             except Exception:
                                 self.ui.log(
                                     b"remotefilefetchlog",
                                     b"Fail\n",
                                     fetched_files=progress.pos - fromcache,
                                     total_to_fetch=total - fromcache,
                                 )
                                 raise
                             finally:
                                 self.ui.verbose = verbose
                             # send to memcache
                             request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
                             cache.request(request)
                         progress.complete()
                         # mark ourselves as a user of this cache
                         writedata.markrepo(self.repo.path)
                     finally:
                         os.umask(oldumask)
                 def receivemissing(self, pipe, filename, node):
                     line = pipe.readline()[:-1]
                     if not line:
                         raise error.ResponseError(
                             _(b"error downloading file contents:"),
                             _(b"connection closed early"),
                         )
                     size = int(line)
                     data = pipe.read(size)
                     if len(data) != size:
                         raise error.ResponseError(
                             _(b"error downloading file contents:"),
                             _(b"only received %s of %s bytes") % (len(data), size),
                         )
                     self.writedata.addremotefilelognode(
                         filename, bin(node), zlib.decompress(data)
                     )
                 def connect(self):
                     if self.cacheprocess:
                         cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
                         self.remotecache.connect(cmd)
                     else:
                         # If no cache process is specified, we fake one that always
                         # returns cache misses.  This enables tests to run easily
                         # and may eventually allow us to be a drop in replacement
                         # for the largefiles extension.
                         class simplecache(object):
                             def __init__(self):
                                 self.missingids = []
                                 self.connected = True
                             def close(self):
                                 pass
                             def request(self, value, flush=True):
                                 lines = value.split(b"\n")
                                 if lines[0] != b"get":
                                     return
                                 self.missingids = lines[2:-1]
                                 self.missingids.append(b'0')
                             def receiveline(self):
                                 if len(self.missingids) > 0:
                                     return self.missingids.pop(0)
                                 return None
                         self.remotecache = simplecache()
                 def close(self):
                     if fetches:
                         msg = (
                             b"%d files fetched over %d fetches - "
                             + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
                         ) % (
                             fetched,
                             fetches,
                             fetchmisses,
                             float(fetched - fetchmisses) / float(fetched) * 100.0,
                             fetchcost,
                         )
                         if self.debugoutput:
                             self.ui.warn(msg)
                         self.ui.log(
                             b"remotefilelog.prefetch",
                             msg.replace(b"%", b"%%"),
                             remotefilelogfetched=fetched,
                             remotefilelogfetches=fetches,
                             remotefilelogfetchmisses=fetchmisses,
                             remotefilelogfetchtime=fetchcost * 1000,
                         )
                     if self.remotecache.connected:
                         self.remotecache.close()
                 def prefetch(
                     self, fileids, force=False, fetchdata=True, fetchhistory=False
                 ):
                     """downloads the given file versions to the cache
                     """
                     repo = self.repo
                     idstocheck = []
                     for file, id in fileids:
                         # hack
                         # - we don't use .hgtags
                         # - workingctx produces ids with length 42,
                         #   which we skip since they aren't in any cache
                         if (
                             file == b'.hgtags'
                             or len(id) == 42
                             or not repo.shallowmatch(file)
                         ):
                             continue
                         idstocheck.append((file, bin(id)))
                     datastore = self.datastore
                     historystore = self.historystore
                     if force:
                         datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                         historystore = metadatastore.unionmetadatastore(
                             *repo.sharedhistorystores
                         )
                     missingids = set()
                     if fetchdata:
                         missingids.update(datastore.getmissing(idstocheck))
                     if fetchhistory:
                         missingids.update(historystore.getmissing(idstocheck))
                     # partition missing nodes into nullid and not-nullid so we can
                     # warn about this filtering potentially shadowing bugs.
                     nullids = len([None for unused, id in missingids if id == nullid])
                     if nullids:
                         missingids = [(f, id) for f, id in missingids if id != nullid]
                         repo.ui.develwarn(
                             (
                                 b'remotefilelog not fetching %d null revs'
                                 b' - this is likely hiding bugs' % nullids
                             ),
                             config=b'remotefilelog-ext',
                         )
                     if missingids:
                         global fetches, fetched, fetchcost
                         fetches += 1
                         # We want to be able to detect excess individual file downloads, so
                         # let's log that information for debugging.
                         if fetches >= 15 and fetches < 18:
                             if fetches == 15:
                                 fetchwarning = self.ui.config(
                                     b'remotefilelog', b'fetchwarning'
                                 )
                                 if fetchwarning:
                                     self.ui.warn(fetchwarning + b'\n')
                             self.logstacktrace()
                         missingids = [(file, hex(id)) for file, id in sorted(missingids)]
                         fetched += len(missingids)
                         start = time.time()
                         missingids = self.request(missingids)
                         if missingids:
                             raise error.Abort(
                                 _(b"unable to download %d files") % len(missingids)
                             )
                         fetchcost += time.time() - start
                         self._lfsprefetch(fileids)
                 def _lfsprefetch(self, fileids):
                     if not _lfsmod or not util.safehasattr(
                         self.repo.svfs, b'lfslocalblobstore'
                     ):
                         return
                     if not _lfsmod.wrapper.candownload(self.repo):
                         return
                     pointers = []
                     store = self.repo.svfs.lfslocalblobstore
                     for file, id in fileids:
                         node = bin(id)
                         rlog = self.repo.file(file)
                         if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                             text = rlog.rawdata(node)
                             p = _lfsmod.pointer.deserialize(text)
                             oid = p.oid()
                             if not store.has(oid):
                                 pointers.append(p)
                     if len(pointers) > 0:
                         self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                         assert all(store.has(p.oid()) for p in pointers)
                 def logstacktrace(self):
                     import traceback
                     self.ui.log(
                         b'remotefilelog',
                         b'excess remotefilelog fetching:\n%s\n',
                         b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
                     )

hgext/remotefilelog/historypack.py

0 +4 -4

             from __future__ import absolute_import
-            import hashlib
             import struct
             from mercurial.node import hex, nullid
             from mercurial import (
                 pycompat,
                 util,
             )
+            from mercurial.utils import hashutil
             from . import (
                 basepack,
                 constants,
                 shallowutil,
             )
             # (filename hash, offset, size)
             INDEXFORMAT2 = b'!20sQQII'
             INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2)
             NODELENGTH = 20
             NODEINDEXFORMAT = b'!20sQ'
             NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT)
             # (node, p1, p2, linknode)
             PACKFORMAT = b"!20s20s20s20sH"
             PACKENTRYLENGTH = 82
             ENTRYCOUNTSIZE = 4
             INDEXSUFFIX = b'.histidx'
             PACKSUFFIX = b'.histpack'
             ANC_NODE = 0
             ANC_P1NODE = 1
             ANC_P2NODE = 2
             ANC_LINKNODE = 3
             ANC_COPYFROM = 4
             class historypackstore(basepack.basepackstore):
                 INDEXSUFFIX = INDEXSUFFIX
                 PACKSUFFIX = PACKSUFFIX
                 def getpack(self, path):
                     return historypack(path)
                 def getancestors(self, name, node, known=None):
                     for pack in self.packs:
                         try:
                             return pack.getancestors(name, node, known=known)
                         except KeyError:
                             pass
                     for pack in self.refresh():
                         try:
                             return pack.getancestors(name, node, known=known)
                         except KeyError:
                             pass
                     raise KeyError((name, node))
                 def getnodeinfo(self, name, node):
                     for pack in self.packs:
                         try:
                             return pack.getnodeinfo(name, node)
                         except KeyError:
                             pass
                     for pack in self.refresh():
                         try:
                             return pack.getnodeinfo(name, node)
                         except KeyError:
                             pass
                     raise KeyError((name, node))
                 def add(self, filename, node, p1, p2, linknode, copyfrom):
                     raise RuntimeError(
                         b"cannot add to historypackstore (%s:%s)" % (filename, hex(node))
                     )
             class historypack(basepack.basepack):
                 INDEXSUFFIX = INDEXSUFFIX
                 PACKSUFFIX = PACKSUFFIX
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, path):
                     super(historypack, self).__init__(path)
                     self.INDEXFORMAT = INDEXFORMAT2
                     self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
                 def getmissing(self, keys):
                     missing = []
                     for name, node in keys:
                         try:
                             self._findnode(name, node)
                         except KeyError:
                             missing.append((name, node))
                     return missing
                 def getancestors(self, name, node, known=None):
                     """Returns as many ancestors as we're aware of.
                     return value: {
                        node: (p1, p2, linknode, copyfrom),
                        ...
                     }
                     """
                     if known and node in known:
                         return []
                     ancestors = self._getancestors(name, node, known=known)
                     results = {}
                     for ancnode, p1, p2, linknode, copyfrom in ancestors:
                         results[ancnode] = (p1, p2, linknode, copyfrom)
                     if not results:
                         raise KeyError((name, node))
                     return results
                 def getnodeinfo(self, name, node):
                     # Drop the node from the tuple before returning, since the result should
                     # just be (p1, p2, linknode, copyfrom)
                     return self._findnode(name, node)[1:]
                 def _getancestors(self, name, node, known=None):
                     if known is None:
                         known = set()
                     section = self._findsection(name)
                     filename, offset, size, nodeindexoffset, nodeindexsize = section
                     pending = set((node,))
                     o = 0
                     while o < size:
                         if not pending:
                             break
                         entry, copyfrom = self._readentry(offset + o)
                         o += PACKENTRYLENGTH
                         if copyfrom:
                             o += len(copyfrom)
                         ancnode = entry[ANC_NODE]
                         if ancnode in pending:
                             pending.remove(ancnode)
                             p1node = entry[ANC_P1NODE]
                             p2node = entry[ANC_P2NODE]
                             if p1node != nullid and p1node not in known:
                                 pending.add(p1node)
                             if p2node != nullid and p2node not in known:
                                 pending.add(p2node)
                             yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
                 def _readentry(self, offset):
                     data = self._data
                     entry = struct.unpack(
                         PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
                     )
                     copyfrom = None
                     copyfromlen = entry[ANC_COPYFROM]
                     if copyfromlen != 0:
                         offset += PACKENTRYLENGTH
                         copyfrom = data[offset : offset + copyfromlen]
                     return entry, copyfrom
                 def add(self, filename, node, p1, p2, linknode, copyfrom):
                     raise RuntimeError(
                         b"cannot add to historypack (%s:%s)" % (filename, hex(node))
                     )
                 def _findnode(self, name, node):
                     if self.VERSION == 0:
                         ancestors = self._getancestors(name, node)
                         for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
                             if ancnode == node:
                                 return (ancnode, p1node, p2node, linknode, copyfrom)
                     else:
                         section = self._findsection(name)
                         nodeindexoffset, nodeindexsize = section[3:]
                         entry = self._bisect(
                             node,
                             nodeindexoffset,
                             nodeindexoffset + nodeindexsize,
                             NODEINDEXENTRYLENGTH,
                         )
                         if entry is not None:
                             node, offset = struct.unpack(NODEINDEXFORMAT, entry)
                             entry, copyfrom = self._readentry(offset)
                             # Drop the copyfromlen from the end of entry, and replace it
                             # with the copyfrom string.
                             return entry[:4] + (copyfrom,)
                     raise KeyError(b"unable to find history for %s:%s" % (name, hex(node)))
                 def _findsection(self, name):
                     params = self.params
-                    namehash = hashlib.sha1(name).digest()
+                    namehash = hashutil.sha1(name).digest()
                     fanoutkey = struct.unpack(
                         params.fanoutstruct, namehash[: params.fanoutprefix]
                     )[0]
                     fanout = self._fanouttable
                     start = fanout[fanoutkey] + params.indexstart
                     indexend = self._indexend
                     for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount):
                         end = fanout[i] + params.indexstart
                         if end != start:
                             break
                     else:
                         end = indexend
                     entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH)
                     if not entry:
                         raise KeyError(name)
                     rawentry = struct.unpack(self.INDEXFORMAT, entry)
                     x, offset, size, nodeindexoffset, nodeindexsize = rawentry
                     rawnamelen = self._index[
                         nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE
                     ]
                     actualnamelen = struct.unpack(b'!H', rawnamelen)[0]
                     nodeindexoffset += constants.FILENAMESIZE
                     actualname = self._index[
                         nodeindexoffset : nodeindexoffset + actualnamelen
                     ]
                     if actualname != name:
                         raise KeyError(
                             b"found file name %s when looking for %s" % (actualname, name)
                         )
                     nodeindexoffset += actualnamelen
                     filenamelength = struct.unpack(
                         b'!H', self._data[offset : offset + constants.FILENAMESIZE]
                     )[0]
                     offset += constants.FILENAMESIZE
                     actualname = self._data[offset : offset + filenamelength]
                     offset += filenamelength
                     if name != actualname:
                         raise KeyError(
                             b"found file name %s when looking for %s" % (actualname, name)
                         )
                     # Skip entry list size
                     offset += ENTRYCOUNTSIZE
                     nodelistoffset = offset
                     nodelistsize = (
                         size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE
                     )
                     return (
                         name,
                         nodelistoffset,
                         nodelistsize,
                         nodeindexoffset,
                         nodeindexsize,
                     )
                 def _bisect(self, node, start, end, entrylen):
                     # Bisect between start and end to find node
                     origstart = start
                     startnode = self._index[start : start + NODELENGTH]
                     endnode = self._index[end : end + NODELENGTH]
                     if startnode == node:
                         return self._index[start : start + entrylen]
                     elif endnode == node:
                         return self._index[end : end + entrylen]
                     else:
                         while start < end - entrylen:
                             mid = start + (end - start) // 2
                             mid = mid - ((mid - origstart) % entrylen)
                             midnode = self._index[mid : mid + NODELENGTH]
                             if midnode == node:
                                 return self._index[mid : mid + entrylen]
                             if node > midnode:
                                 start = mid
                             elif node < midnode:
                                 end = mid
                     return None
                 def markledger(self, ledger, options=None):
                     for filename, node in self:
                         ledger.markhistoryentry(self, filename, node)
                 def cleanup(self, ledger):
                     entries = ledger.sources.get(self, [])
                     allkeys = set(self)
                     repackedkeys = set(
                         (e.filename, e.node) for e in entries if e.historyrepacked
                     )
                     if len(allkeys - repackedkeys) == 0:
                         if self.path not in ledger.created:
                             util.unlinkpath(self.indexpath, ignoremissing=True)
                             util.unlinkpath(self.packpath, ignoremissing=True)
                 def __iter__(self):
                     for f, n, x, x, x, x in self.iterentries():
                         yield f, n
                 def iterentries(self):
                     # Start at 1 to skip the header
                     offset = 1
                     while offset < self.datasize:
                         data = self._data
                         # <2 byte len> + <filename>
                         filenamelen = struct.unpack(
                             b'!H', data[offset : offset + constants.FILENAMESIZE]
                         )[0]
                         offset += constants.FILENAMESIZE
                         filename = data[offset : offset + filenamelen]
                         offset += filenamelen
                         revcount = struct.unpack(
                             b'!I', data[offset : offset + ENTRYCOUNTSIZE]
                         )[0]
                         offset += ENTRYCOUNTSIZE
                         for i in pycompat.xrange(revcount):
                             entry = struct.unpack(
                                 PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
                             )
                             offset += PACKENTRYLENGTH
                             copyfrom = data[offset : offset + entry[ANC_COPYFROM]]
                             offset += entry[ANC_COPYFROM]
                             yield (
                                 filename,
                                 entry[ANC_NODE],
                                 entry[ANC_P1NODE],
                                 entry[ANC_P2NODE],
                                 entry[ANC_LINKNODE],
                                 copyfrom,
                             )
                             self._pagedin += PACKENTRYLENGTH
                         # If we've read a lot of data from the mmap, free some memory.
                         self.freememory()
             class mutablehistorypack(basepack.mutablebasepack):
                 """A class for constructing and serializing a histpack file and index.
                 A history pack is a pair of files that contain the revision history for
                 various file revisions in Mercurial. It contains only revision history (like
                 parent pointers and linknodes), not any revision content information.
                 It consists of two files, with the following format:
                 .histpack
                     The pack itself is a series of file revisions with some basic header
                     information on each.
                     datapack = <version: 1 byte>
                                [<filesection>,...]
                     filesection = <filename len: 2 byte unsigned int>
                                   <filename>
                                   <revision count: 4 byte unsigned int>
                                   [<revision>,...]
                     revision = <node: 20 byte>
                                <p1node: 20 byte>
                                <p2node: 20 byte>
                                <linknode: 20 byte>
                                <copyfromlen: 2 byte>
                                <copyfrom>
                     The revisions within each filesection are stored in topological order
                     (newest first). If a given entry has a parent from another file (a copy)
                     then p1node is the node from the other file, and copyfrom is the
                     filepath of the other file.
                 .histidx
                     The index file provides a mapping from filename to the file section in
                     the histpack. In V1 it also contains sub-indexes for specific nodes
                     within each file. It consists of three parts, the fanout, the file index
                     and the node indexes.
                     The file index is a list of index entries, sorted by filename hash (one
                     per file section in the pack). Each entry has:
                     - node (The 20 byte hash of the filename)
                     - pack entry offset (The location of this file section in the histpack)
                     - pack content size (The on-disk length of this file section's pack
                                          data)
                     - node index offset (The location of the file's node index in the index
                                          file) [1]
                     - node index size (the on-disk length of this file's node index) [1]
                     The fanout is a quick lookup table to reduce the number of steps for
                     bisecting the index. It is a series of 4 byte pointers to positions
                     within the index. It has 2^16 entries, which corresponds to hash
                     prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F
                     points to the index position of the first revision whose node starts
                     with 4F. This saves log(2^16) bisect steps.
                     dataidx = <fanouttable>
                               <file count: 8 byte unsigned> [1]
                               <fileindex>
                               <node count: 8 byte unsigned> [1]
                               [<nodeindex>,...] [1]
                     fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
                     fileindex = [<file index entry>,...]
                     fileindexentry = <node: 20 byte>
                                      <pack file section offset: 8 byte unsigned int>
                                      <pack file section size: 8 byte unsigned int>
                                      <node index offset: 4 byte unsigned int> [1]
                                      <node index size: 4 byte unsigned int>   [1]
                     nodeindex = <filename>[<node index entry>,...] [1]
                     filename = <filename len : 2 byte unsigned int><filename value> [1]
                     nodeindexentry = <node: 20 byte> [1]
                                      <pack file node offset: 8 byte unsigned int> [1]
                 [1]: new in version 1.
                 """
                 INDEXSUFFIX = INDEXSUFFIX
                 PACKSUFFIX = PACKSUFFIX
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, ui, packpath, version=2):
                     super(mutablehistorypack, self).__init__(ui, packpath, version=version)
                     self.files = {}
                     self.entrylocations = {}
                     self.fileentries = {}
                     self.INDEXFORMAT = INDEXFORMAT2
                     self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
                     self.NODEINDEXFORMAT = NODEINDEXFORMAT
                     self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
                 def add(self, filename, node, p1, p2, linknode, copyfrom):
                     copyfrom = copyfrom or b''
                     copyfromlen = struct.pack(b'!H', len(copyfrom))
                     self.fileentries.setdefault(filename, []).append(
                         (node, p1, p2, linknode, copyfromlen, copyfrom)
                     )
                 def _write(self):
                     for filename in sorted(self.fileentries):
                         entries = self.fileentries[filename]
                         sectionstart = self.packfp.tell()
                         # Write the file section content
                         entrymap = dict((e[0], e) for e in entries)
                         def parentfunc(node):
                             x, p1, p2, x, x, x = entrymap[node]
                             parents = []
                             if p1 != nullid:
                                 parents.append(p1)
                             if p2 != nullid:
                                 parents.append(p2)
                             return parents
                         sortednodes = list(
                             reversed(
                                 shallowutil.sortnodes((e[0] for e in entries), parentfunc)
                             )
                         )
                         # Write the file section header
                         self.writeraw(
                             b"%s%s%s"
                             % (
                                 struct.pack(b'!H', len(filename)),
                                 filename,
                                 struct.pack(b'!I', len(sortednodes)),
                             )
                         )
                         sectionlen = constants.FILENAMESIZE + len(filename) + 4
                         rawstrings = []
                         # Record the node locations for the index
                         locations = self.entrylocations.setdefault(filename, {})
                         offset = sectionstart + sectionlen
                         for node in sortednodes:
                             locations[node] = offset
                             raw = b'%s%s%s%s%s%s' % entrymap[node]
                             rawstrings.append(raw)
                             offset += len(raw)
                         rawdata = b''.join(rawstrings)
                         sectionlen += len(rawdata)
                         self.writeraw(rawdata)
                         # Record metadata for the index
                         self.files[filename] = (sectionstart, sectionlen)
-                        node = hashlib.sha1(filename).digest()
+                        node = hashutil.sha1(filename).digest()
                         self.entries[node] = node
                 def close(self, ledger=None):
                     if self._closed:
                         return
                     self._write()
                     return super(mutablehistorypack, self).close(ledger=ledger)
                 def createindex(self, nodelocations, indexoffset):
                     fileindexformat = self.INDEXFORMAT
                     fileindexlength = self.INDEXENTRYLENGTH
                     nodeindexformat = self.NODEINDEXFORMAT
                     nodeindexlength = self.NODEINDEXENTRYLENGTH
                     files = (
-                        (hashlib.sha1(filename).digest(), filename, offset, size)
+                        (hashutil.sha1(filename).digest(), filename, offset, size)
                         for filename, (offset, size) in pycompat.iteritems(self.files)
                     )
                     files = sorted(files)
                     # node index is after file index size, file index, and node index size
                     indexlensize = struct.calcsize(b'!Q')
                     nodeindexoffset = (
                         indexoffset
                         + indexlensize
                         + (len(files) * fileindexlength)
                         + indexlensize
                     )
                     fileindexentries = []
                     nodeindexentries = []
                     nodecount = 0
                     for namehash, filename, offset, size in files:
                         # File section index
                         nodelocations = self.entrylocations[filename]
                         nodeindexsize = len(nodelocations) * nodeindexlength
                         rawentry = struct.pack(
                             fileindexformat,
                             namehash,
                             offset,
                             size,
                             nodeindexoffset,
                             nodeindexsize,
                         )
                         # Node index
                         nodeindexentries.append(
                             struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename
                         )
                         nodeindexoffset += constants.FILENAMESIZE + len(filename)
                         for node, location in sorted(pycompat.iteritems(nodelocations)):
                             nodeindexentries.append(
                                 struct.pack(nodeindexformat, node, location)
                             )
                             nodecount += 1
                         nodeindexoffset += len(nodelocations) * nodeindexlength
                         fileindexentries.append(rawentry)
                     nodecountraw = struct.pack(b'!Q', nodecount)
                     return (
                         b''.join(fileindexentries)
                         + nodecountraw
                         + b''.join(nodeindexentries)
                     )

hgext/remotefilelog/shallowutil.py

0 +3 -3

             # shallowutil.py -- remotefilelog utilities
             #
             # Copyright 2014 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import errno
-            import hashlib
             import os
             import stat
             import struct
             import tempfile
             from mercurial.i18n import _
             from mercurial.pycompat import open
             from mercurial import (
                 error,
                 node,
                 pycompat,
                 revlog,
                 util,
             )
             from mercurial.utils import (
+                hashutil,
                 storageutil,
                 stringutil,
             )
             from . import constants
             if not pycompat.iswindows:
                 import grp
             def isenabled(repo):
                 """returns whether the repository is remotefilelog enabled or not"""
                 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
             def getcachekey(reponame, file, id):
-                pathhash = node.hex(hashlib.sha1(file).digest())
+                pathhash = node.hex(hashutil.sha1(file).digest())
                 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
             def getlocalkey(file, id):
-                pathhash = node.hex(hashlib.sha1(file).digest())
+                pathhash = node.hex(hashutil.sha1(file).digest())
                 return os.path.join(pathhash, id)
             def getcachepath(ui, allowempty=False):
                 cachepath = ui.config(b"remotefilelog", b"cachepath")
                 if not cachepath:
                     if allowempty:
                         return None
                     else:
                         raise error.Abort(
                             _(b"could not find config option remotefilelog.cachepath")
                         )
                 return util.expandpath(cachepath)
             def getcachepackpath(repo, category):
                 cachepath = getcachepath(repo.ui)
                 if category != constants.FILEPACK_CATEGORY:
                     return os.path.join(cachepath, repo.name, b'packs', category)
                 else:
                     return os.path.join(cachepath, repo.name, b'packs')
             def getlocalpackpath(base, category):
                 return os.path.join(base, b'packs', category)
             def createrevlogtext(text, copyfrom=None, copyrev=None):
                 """returns a string that matches the revlog contents in a
                 traditional revlog
                 """
                 meta = {}
                 if copyfrom or text.startswith(b'\1\n'):
                     if copyfrom:
                         meta[b'copy'] = copyfrom
                         meta[b'copyrev'] = copyrev
                     text = storageutil.packmeta(meta, text)
                 return text
             def parsemeta(text):
                 """parse mercurial filelog metadata"""
                 meta, size = storageutil.parsemeta(text)
                 if text.startswith(b'\1\n'):
                     s = text.index(b'\1\n', 2)
                     text = text[s + 2 :]
                 return meta or {}, text
             def sumdicts(*dicts):
                 """Adds all the values of *dicts together into one dictionary. This assumes
                 the values in *dicts are all summable.
                 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
                 """
                 result = collections.defaultdict(lambda: 0)
                 for dict in dicts:
                     for k, v in pycompat.iteritems(dict):
                         result[k] += v
                 return result
             def prefixkeys(dict, prefix):
                 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
                 result = {}
                 for k, v in pycompat.iteritems(dict):
                     result[prefix + k] = v
                 return result
             def reportpackmetrics(ui, prefix, *stores):
                 dicts = [s.getmetrics() for s in stores]
                 dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
                 ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
             def _parsepackmeta(metabuf):
                 """parse datapack meta, bytes (<metadata-list>) -> dict
                 The dict contains raw content - both keys and values are strings.
                 Upper-level business may want to convert some of them to other types like
                 integers, on their own.
                 raise ValueError if the data is corrupted
                 """
                 metadict = {}
                 offset = 0
                 buflen = len(metabuf)
                 while buflen - offset >= 3:
                     key = metabuf[offset : offset + 1]
                     offset += 1
                     metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
                     offset += 2
                     if offset + metalen > buflen:
                         raise ValueError(b'corrupted metadata: incomplete buffer')
                     value = metabuf[offset : offset + metalen]
                     metadict[key] = value
                     offset += metalen
                 if offset != buflen:
                     raise ValueError(b'corrupted metadata: redundant data')
                 return metadict
             def _buildpackmeta(metadict):
                 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
                 The dict contains raw content - both keys and values are strings.
                 Upper-level business may want to serialize some of other types (like
                 integers) to strings before calling this function.
                 raise ProgrammingError when metadata key is illegal, or ValueError if
                 length limit is exceeded
                 """
                 metabuf = b''
                 for k, v in sorted(pycompat.iteritems((metadict or {}))):
                     if len(k) != 1:
                         raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
                     if len(v) > 0xFFFE:
                         raise ValueError(
                             b'metadata value is too long: 0x%x > 0xfffe' % len(v)
                         )
                     metabuf += k
                     metabuf += struct.pack(b'!H', len(v))
                     metabuf += v
                 # len(metabuf) is guaranteed representable in 4 bytes, because there are
                 # only 256 keys, and for each value, len(value) <= 0xfffe.
                 return metabuf
             _metaitemtypes = {
                 constants.METAKEYFLAG: (int, pycompat.long),
                 constants.METAKEYSIZE: (int, pycompat.long),
             }
             def buildpackmeta(metadict):
                 """like _buildpackmeta, but typechecks metadict and normalize it.
                 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
                 and METAKEYFLAG will be dropped if its value is 0.
                 """
                 newmeta = {}
                 for k, v in pycompat.iteritems(metadict or {}):
                     expectedtype = _metaitemtypes.get(k, (bytes,))
                     if not isinstance(v, expectedtype):
                         raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
                     # normalize int to binary buffer
                     if int in expectedtype:
                         # optimization: remove flag if it's 0 to save space
                         if k == constants.METAKEYFLAG and v == 0:
                             continue
                         v = int2bin(v)
                     newmeta[k] = v
                 return _buildpackmeta(newmeta)
             def parsepackmeta(metabuf):
                 """like _parsepackmeta, but convert fields to desired types automatically.
                 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
                 integers.
                 """
                 metadict = _parsepackmeta(metabuf)
                 for k, v in pycompat.iteritems(metadict):
                     if k in _metaitemtypes and int in _metaitemtypes[k]:
                         metadict[k] = bin2int(v)
                 return metadict
             def int2bin(n):
                 """convert a non-negative integer to raw binary buffer"""
                 buf = bytearray()
                 while n > 0:
                     buf.insert(0, n & 0xFF)
                     n >>= 8
                 return bytes(buf)
             def bin2int(buf):
                 """the reverse of int2bin, convert a binary buffer to an integer"""
                 x = 0
                 for b in bytearray(buf):
                     x <<= 8
                     x |= b
                 return x
             def parsesizeflags(raw):
                 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
                 see remotefilelogserver.createfileblob for the format.
                 raise RuntimeError if the content is illformed.
                 """
                 flags = revlog.REVIDX_DEFAULT_FLAGS
                 size = None
                 try:
                     index = raw.index(b'\0')
                     header = raw[:index]
                     if header.startswith(b'v'):
                         # v1 and above, header starts with 'v'
                         if header.startswith(b'v1\n'):
                             for s in header.split(b'\n'):
                                 if s.startswith(constants.METAKEYSIZE):
                                     size = int(s[len(constants.METAKEYSIZE) :])
                                 elif s.startswith(constants.METAKEYFLAG):
                                     flags = int(s[len(constants.METAKEYFLAG) :])
                         else:
                             raise RuntimeError(
                                 b'unsupported remotefilelog header: %s' % header
                             )
                     else:
                         # v0, str(int(size)) is the header
                         size = int(header)
                 except ValueError:
                     raise RuntimeError("unexpected remotefilelog header: illegal format")
                 if size is None:
                     raise RuntimeError("unexpected remotefilelog header: no size found")
                 return index + 1, size, flags
             def buildfileblobheader(size, flags, version=None):
                 """return the header of a remotefilelog blob.
                 see remotefilelogserver.createfileblob for the format.
                 approximately the reverse of parsesizeflags.
                 version could be 0 or 1, or None (auto decide).
                 """
                 # choose v0 if flags is empty, otherwise v1
                 if version is None:
                     version = int(bool(flags))
                 if version == 1:
                     header = b'v1\n%s%d\n%s%d' % (
                         constants.METAKEYSIZE,
                         size,
                         constants.METAKEYFLAG,
                         flags,
                     )
                 elif version == 0:
                     if flags:
                         raise error.ProgrammingError(b'fileblob v0 does not support flag')
                     header = b'%d' % size
                 else:
                     raise error.ProgrammingError(b'unknown fileblob version %d' % version)
                 return header
             def ancestormap(raw):
                 offset, size, flags = parsesizeflags(raw)
                 start = offset + size
                 mapping = {}
                 while start < len(raw):
                     divider = raw.index(b'\0', start + 80)
                     currentnode = raw[start : (start + 20)]
                     p1 = raw[(start + 20) : (start + 40)]
                     p2 = raw[(start + 40) : (start + 60)]
                     linknode = raw[(start + 60) : (start + 80)]
                     copyfrom = raw[(start + 80) : divider]
                     mapping[currentnode] = (p1, p2, linknode, copyfrom)
                     start = divider + 1
                 return mapping
             def readfile(path):
                 f = open(path, b'rb')
                 try:
                     result = f.read()
                     # we should never have empty files
                     if not result:
                         os.remove(path)
                         raise IOError(b"empty file: %s" % path)
                     return result
                 finally:
                     f.close()
             def unlinkfile(filepath):
                 if pycompat.iswindows:
                     # On Windows, os.unlink cannnot delete readonly files
                     os.chmod(filepath, stat.S_IWUSR)
                 os.unlink(filepath)
             def renamefile(source, destination):
                 if pycompat.iswindows:
                     # On Windows, os.rename cannot rename readonly files
                     # and cannot overwrite destination if it exists
                     os.chmod(source, stat.S_IWUSR)
                     if os.path.isfile(destination):
                         os.chmod(destination, stat.S_IWUSR)
                         os.unlink(destination)
                 os.rename(source, destination)
             def writefile(path, content, readonly=False):
                 dirname, filename = os.path.split(path)
                 if not os.path.exists(dirname):
                     try:
                         os.makedirs(dirname)
                     except OSError as ex:
                         if ex.errno != errno.EEXIST:
                             raise
                 fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
                 os.close(fd)
                 try:
                     f = util.posixfile(temp, b'wb')
                     f.write(content)
                     f.close()
                     if readonly:
                         mode = 0o444
                     else:
                         # tempfiles are created with 0o600, so we need to manually set the
                         # mode.
                         oldumask = os.umask(0)
                         # there's no way to get the umask without modifying it, so set it
                         # back
                         os.umask(oldumask)
                         mode = ~oldumask
                     renamefile(temp, path)
                     os.chmod(path, mode)
                 except Exception:
                     try:
                         unlinkfile(temp)
                     except OSError:
                         pass
                     raise
             def sortnodes(nodes, parentfunc):
                 """Topologically sorts the nodes, using the parentfunc to find
                 the parents of nodes."""
                 nodes = set(nodes)
                 childmap = {}
                 parentmap = {}
                 roots = []
                 # Build a child and parent map
                 for n in nodes:
                     parents = [p for p in parentfunc(n) if p in nodes]
                     parentmap[n] = set(parents)
                     for p in parents:
                         childmap.setdefault(p, set()).add(n)
                     if not parents:
                         roots.append(n)
                 roots.sort()
                 # Process roots, adding children to the queue as they become roots
                 results = []
                 while roots:
                     n = roots.pop(0)
                     results.append(n)
                     if n in childmap:
                         children = childmap[n]
                         for c in children:
                             childparents = parentmap[c]
                             childparents.remove(n)
                             if len(childparents) == 0:
                                 # insert at the beginning, that way child nodes
                                 # are likely to be output immediately after their
                                 # parents.  This gives better compression results.
                                 roots.insert(0, c)
                 return results
             def readexactly(stream, n):
                 '''read n bytes from stream.read and abort if less was available'''
                 s = stream.read(n)
                 if len(s) < n:
                     raise error.Abort(
                         _(b"stream ended unexpectedly (got %d bytes, expected %d)")
                         % (len(s), n)
                     )
                 return s
             def readunpack(stream, fmt):
                 data = readexactly(stream, struct.calcsize(fmt))
                 return struct.unpack(fmt, data)
             def readpath(stream):
                 rawlen = readexactly(stream, constants.FILENAMESIZE)
                 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
                 return readexactly(stream, pathlen)
             def readnodelist(stream):
                 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
                 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
                 for i in pycompat.xrange(nodecount):
                     yield readexactly(stream, constants.NODESIZE)
             def readpathlist(stream):
                 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
                 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
                 for i in pycompat.xrange(pathcount):
                     yield readpath(stream)
             def getgid(groupname):
                 try:
                     gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
                     return gid
                 except KeyError:
                     return None
             def setstickygroupdir(path, gid, warn=None):
                 if gid is None:
                     return
                 try:
                     os.chown(path, -1, gid)
                     os.chmod(path, 0o2775)
                 except (IOError, OSError) as ex:
                     if warn:
                         warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
             def mkstickygroupdir(ui, path):
                 """Creates the given directory (if it doesn't exist) and give it a
                 particular group with setgid enabled."""
                 gid = None
                 groupname = ui.config(b"remotefilelog", b"cachegroup")
                 if groupname:
                     gid = getgid(groupname)
                     if gid is None:
                         ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
                 # we use a single stat syscall to test the existence and mode / group bit
                 st = None
                 try:
                     st = os.stat(path)
                 except OSError:
                     pass
                 if st:
                     # exists
                     if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
                         # permission needs to be fixed
                         setstickygroupdir(path, gid, ui.warn)
                     return
                 oldumask = os.umask(0o002)
                 try:
                     missingdirs = [path]
                     path = os.path.dirname(path)
                     while path and not os.path.exists(path):
                         missingdirs.append(path)
                         path = os.path.dirname(path)
                     for path in reversed(missingdirs):
                         try:
                             os.mkdir(path)
                         except OSError as ex:
                             if ex.errno != errno.EEXIST:
                                 raise
                     for path in missingdirs:
                         setstickygroupdir(path, gid, ui.warn)
                 finally:
                     os.umask(oldumask)
             def getusername(ui):
                 try:
                     return stringutil.shortuser(ui.username())
                 except Exception:
                     return b'unknown'
             def getreponame(ui):
                 reponame = ui.config(b'paths', b'default')
                 if reponame:
                     return os.path.basename(reponame)
                 return b"unknown"

hgext/sqlitestore.py

0 +7 -5

             # sqlitestore.py - Storage backend that uses SQLite
             #
             # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """store repository data in SQLite (EXPERIMENTAL)
             The sqlitestore extension enables the storage of repository data in SQLite.
             This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
             GUARANTEES. This means that repositories created with this extension may
             only be usable with the exact version of this extension/Mercurial that was
             used. The extension attempts to enforce this in order to prevent repository
             corruption.
             In addition, several features are not yet supported or have known bugs:
             * Only some data is stored in SQLite. Changeset, manifest, and other repository
               data is not yet stored in SQLite.
             * Transactions are not robust. If the process is aborted at the right time
               during transaction close/rollback, the repository could be in an inconsistent
               state. This problem will diminish once all repository data is tracked by
               SQLite.
             * Bundle repositories do not work (the ability to use e.g.
               `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
               existing repository).
             * Various other features don't work.
             This extension should work for basic clone/pull, update, and commit workflows.
             Some history rewriting operations may fail due to lack of support for bundle
             repositories.
             To use, activate the extension and set the ``storage.new-repo-backend`` config
             option to ``sqlite`` to enable new repositories to use SQLite for storage.
             """
             # To run the test suite with repos using SQLite by default, execute the
             # following:
             #
             # HGREPOFEATURES="sqlitestore" run-tests.py \
             #     --extra-config-opt extensions.sqlitestore= \
             #     --extra-config-opt storage.new-repo-backend=sqlite
             from __future__ import absolute_import
-            import hashlib
             import sqlite3
             import struct
             import threading
             import zlib
             from mercurial.i18n import _
             from mercurial.node import (
                 nullid,
                 nullrev,
                 short,
             )
             from mercurial.thirdparty import attr
             from mercurial import (
                 ancestor,
                 dagop,
                 encoding,
                 error,
                 extensions,
                 localrepo,
                 mdiff,
                 pycompat,
                 registrar,
                 util,
                 verify,
             )
             from mercurial.interfaces import (
                 repository,
                 util as interfaceutil,
             )
-            from mercurial.utils import storageutil
+            from mercurial.utils import (
+                hashutil,
+                storageutil,
+            )
             try:
                 from mercurial import zstd
                 zstd.__version__
             except ImportError:
                 zstd = None
             configtable = {}
             configitem = registrar.configitem(configtable)
             # experimental config: storage.sqlite.compression
             configitem(
                 b'storage',
                 b'sqlite.compression',
                 default=b'zstd' if zstd else b'zlib',
                 experimental=True,
             )
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = b'ships-with-hg-core'
             REQUIREMENT = b'exp-sqlite-001'
             REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
             REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
             REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
             REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
             CURRENT_SCHEMA_VERSION = 1
             COMPRESSION_NONE = 1
             COMPRESSION_ZSTD = 2
             COMPRESSION_ZLIB = 3
             FLAG_CENSORED = 1
             FLAG_MISSING_P1 = 2
             FLAG_MISSING_P2 = 4
             CREATE_SCHEMA = [
                 # Deltas are stored as content-indexed blobs.
                 # compression column holds COMPRESSION_* constant for how the
                 # delta is encoded.
                 'CREATE TABLE delta ('
                 '    id INTEGER PRIMARY KEY, '
                 '    compression INTEGER NOT NULL, '
                 '    hash BLOB UNIQUE ON CONFLICT ABORT, '
                 '    delta BLOB NOT NULL '
                 ')',
                 # Tracked paths are denormalized to integers to avoid redundant
                 # storage of the path name.
                 'CREATE TABLE filepath ('
                 '    id INTEGER PRIMARY KEY, '
                 '    path BLOB NOT NULL '
                 ')',
                 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
                 # We have a single table for all file revision data.
                 # Each file revision is uniquely described by a (path, rev) and
                 # (path, node).
                 #
                 # Revision data is stored as a pointer to the delta producing this
                 # revision and the file revision whose delta should be applied before
                 # that one. One can reconstruct the delta chain by recursively following
                 # the delta base revision pointers until one encounters NULL.
                 #
                 # flags column holds bitwise integer flags controlling storage options.
                 # These flags are defined by the FLAG_* constants.
                 'CREATE TABLE fileindex ('
                 '    id INTEGER PRIMARY KEY, '
                 '    pathid INTEGER REFERENCES filepath(id), '
                 '    revnum INTEGER NOT NULL, '
                 '    p1rev INTEGER NOT NULL, '
                 '    p2rev INTEGER NOT NULL, '
                 '    linkrev INTEGER NOT NULL, '
                 '    flags INTEGER NOT NULL, '
                 '    deltaid INTEGER REFERENCES delta(id), '
                 '    deltabaseid INTEGER REFERENCES fileindex(id), '
                 '    node BLOB NOT NULL '
                 ')',
                 'CREATE UNIQUE INDEX fileindex_pathrevnum '
                 '    ON fileindex (pathid, revnum)',
                 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
                 # Provide a view over all file data for convenience.
                 'CREATE VIEW filedata AS '
                 'SELECT '
                 '    fileindex.id AS id, '
                 '    filepath.id AS pathid, '
                 '    filepath.path AS path, '
                 '    fileindex.revnum AS revnum, '
                 '    fileindex.node AS node, '
                 '    fileindex.p1rev AS p1rev, '
                 '    fileindex.p2rev AS p2rev, '
                 '    fileindex.linkrev AS linkrev, '
                 '    fileindex.flags AS flags, '
                 '    fileindex.deltaid AS deltaid, '
                 '    fileindex.deltabaseid AS deltabaseid '
                 'FROM filepath, fileindex '
                 'WHERE fileindex.pathid=filepath.id',
                 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
             ]
             def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
                 """Resolve a delta chain for a file node."""
                 # TODO the "not in ({stops})" here is possibly slowing down the query
                 # because it needs to perform the lookup on every recursive invocation.
                 # This could possibly be faster if we created a temporary query with
                 # baseid "poisoned" to null and limited the recursive filter to
                 # "is not null".
                 res = db.execute(
                     'WITH RECURSIVE '
                     '    deltachain(deltaid, baseid) AS ('
                     '        SELECT deltaid, deltabaseid FROM fileindex '
                     '            WHERE pathid=? AND node=? '
                     '        UNION ALL '
                     '        SELECT fileindex.deltaid, deltabaseid '
                     '            FROM fileindex, deltachain '
                     '            WHERE '
                     '                fileindex.id=deltachain.baseid '
                     '                AND deltachain.baseid IS NOT NULL '
                     '                AND fileindex.id NOT IN ({stops}) '
                     '    ) '
                     'SELECT deltachain.baseid, compression, delta '
                     'FROM deltachain, delta '
                     'WHERE delta.id=deltachain.deltaid'.format(
                         stops=','.join(['?'] * len(stoprids))
                     ),
                     tuple([pathid, node] + list(stoprids.keys())),
                 )
                 deltas = []
                 lastdeltabaseid = None
                 for deltabaseid, compression, delta in res:
                     lastdeltabaseid = deltabaseid
                     if compression == COMPRESSION_ZSTD:
                         delta = zstddctx.decompress(delta)
                     elif compression == COMPRESSION_NONE:
                         delta = delta
                     elif compression == COMPRESSION_ZLIB:
                         delta = zlib.decompress(delta)
                     else:
                         raise SQLiteStoreError(
                             b'unhandled compression type: %d' % compression
                         )
                     deltas.append(delta)
                 if lastdeltabaseid in stoprids:
                     basetext = revisioncache[stoprids[lastdeltabaseid]]
                 else:
                     basetext = deltas.pop()
                 deltas.reverse()
                 fulltext = mdiff.patches(basetext, deltas)
                 # SQLite returns buffer instances for blob columns on Python 2. This
                 # type can propagate through the delta application layer. Because
                 # downstream callers assume revisions are bytes, cast as needed.
                 if not isinstance(fulltext, bytes):
                     fulltext = bytes(delta)
                 return fulltext
             def insertdelta(db, compression, hash, delta):
                 try:
                     return db.execute(
                         'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
                         (compression, hash, delta),
                     ).lastrowid
                 except sqlite3.IntegrityError:
                     return db.execute(
                         'SELECT id FROM delta WHERE hash=?', (hash,)
                     ).fetchone()[0]
             class SQLiteStoreError(error.StorageError):
                 pass
             @attr.s
             class revisionentry(object):
                 rid = attr.ib()
                 rev = attr.ib()
                 node = attr.ib()
                 p1rev = attr.ib()
                 p2rev = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 linkrev = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class sqliterevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class sqliteproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             @interfaceutil.implementer(repository.ifilestorage)
             class sqlitefilestore(object):
                 """Implements storage for an individual tracked path."""
                 def __init__(self, db, path, compression):
                     self._db = db
                     self._path = path
                     self._pathid = None
                     # revnum -> node
                     self._revtonode = {}
                     # node -> revnum
                     self._nodetorev = {}
                     # node -> data structure
                     self._revisions = {}
                     self._revisioncache = util.lrucachedict(10)
                     self._compengine = compression
                     if compression == b'zstd':
                         self._cctx = zstd.ZstdCompressor(level=3)
                         self._dctx = zstd.ZstdDecompressor()
                     else:
                         self._cctx = None
                         self._dctx = None
                     self._refreshindex()
                 def _refreshindex(self):
                     self._revtonode = {}
                     self._nodetorev = {}
                     self._revisions = {}
                     res = list(
                         self._db.execute(
                             'SELECT id FROM filepath WHERE path=?', (self._path,)
                         )
                     )
                     if not res:
                         self._pathid = None
                         return
                     self._pathid = res[0][0]
                     res = self._db.execute(
                         'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
                         'FROM fileindex '
                         'WHERE pathid=? '
                         'ORDER BY revnum ASC',
                         (self._pathid,),
                     )
                     for i, row in enumerate(res):
                         rid, rev, node, p1rev, p2rev, linkrev, flags = row
                         if i != rev:
                             raise SQLiteStoreError(
                                 _(b'sqlite database has inconsistent revision numbers')
                             )
                         if p1rev == nullrev:
                             p1node = nullid
                         else:
                             p1node = self._revtonode[p1rev]
                         if p2rev == nullrev:
                             p2node = nullid
                         else:
                             p2node = self._revtonode[p2rev]
                         entry = revisionentry(
                             rid=rid,
                             rev=rev,
                             node=node,
                             p1rev=p1rev,
                             p2rev=p2rev,
                             p1node=p1node,
                             p2node=p2node,
                             linkrev=linkrev,
                             flags=flags,
                         )
                         self._revtonode[rev] = node
                         self._nodetorev[node] = rev
                         self._revisions[node] = entry
                 # Start of ifileindex interface.
                 def __len__(self):
                     return len(self._revisions)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self._revisions)))
                 def hasnode(self, node):
                     if node == nullid:
                         return False
                     return node in self._nodetorev
                 def revs(self, start=0, stop=None):
                     return storageutil.iterrevs(
                         len(self._revisions), start=start, stop=stop
                     )
                 def parents(self, node):
                     if node == nullid:
                         return nullid, nullid
                     if node not in self._revisions:
                         raise error.LookupError(node, self._path, _(b'no node'))
                     entry = self._revisions[node]
                     return entry.p1node, entry.p2node
                 def parentrevs(self, rev):
                     if rev == nullrev:
                         return nullrev, nullrev
                     if rev not in self._revtonode:
                         raise IndexError(rev)
                     entry = self._revisions[self._revtonode[rev]]
                     return entry.p1rev, entry.p2rev
                 def rev(self, node):
                     if node == nullid:
                         return nullrev
                     if node not in self._nodetorev:
                         raise error.LookupError(node, self._path, _(b'no node'))
                     return self._nodetorev[node]
                 def node(self, rev):
                     if rev == nullrev:
                         return nullid
                     if rev not in self._revtonode:
                         raise IndexError(rev)
                     return self._revtonode[rev]
                 def lookup(self, node):
                     return storageutil.fileidlookup(self, node, self._path)
                 def linkrev(self, rev):
                     if rev == nullrev:
                         return nullrev
                     if rev not in self._revtonode:
                         raise IndexError(rev)
                     entry = self._revisions[self._revtonode[rev]]
                     return entry.linkrev
                 def iscensored(self, rev):
                     if rev == nullrev:
                         return False
                     if rev not in self._revtonode:
                         raise IndexError(rev)
                     return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
                 def commonancestorsheads(self, node1, node2):
                     rev1 = self.rev(node1)
                     rev2 = self.rev(node2)
                     ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
                     return pycompat.maplist(self.node, ancestors)
                 def descendants(self, revs):
                     # TODO we could implement this using a recursive SQL query, which
                     # might be faster.
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def heads(self, start=None, stop=None):
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                     startrev = self.rev(start) if start is not None else nullrev
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     rev = self.rev(node)
                     res = self._db.execute(
                         'SELECT'
                         '  node '
                         '  FROM filedata '
                         '  WHERE path=? AND (p1rev=? OR p2rev=?) '
                         '  ORDER BY revnum ASC',
                         (self._path, rev, rev),
                     )
                     return [row[0] for row in res]
                 # End of ifileindex interface.
                 # Start of ifiledata interface.
                 def size(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev not in self._revtonode:
                         raise IndexError(rev)
                     node = self._revtonode[rev]
                     if self.renamed(node):
                         return len(self.read(node))
                     return len(self.revision(node))
                 def revision(self, node, raw=False, _verifyhash=True):
                     if node in (nullid, nullrev):
                         return b''
                     if isinstance(node, int):
                         node = self.node(node)
                     if node not in self._nodetorev:
                         raise error.LookupError(node, self._path, _(b'no node'))
                     if node in self._revisioncache:
                         return self._revisioncache[node]
                     # Because we have a fulltext revision cache, we are able to
                     # short-circuit delta chain traversal and decompression as soon as
                     # we encounter a revision in the cache.
                     stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
                     if not stoprids:
                         stoprids[-1] = None
                     fulltext = resolvedeltachain(
                         self._db,
                         self._pathid,
                         node,
                         self._revisioncache,
                         stoprids,
                         zstddctx=self._dctx,
                     )
                     # Don't verify hashes if parent nodes were rewritten, as the hash
                     # wouldn't verify.
                     if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
                         _verifyhash = False
                     if _verifyhash:
                         self._checkhash(fulltext, node)
                         self._revisioncache[node] = fulltext
                     return fulltext
                 def rawdata(self, *args, **kwargs):
                     return self.revision(*args, **kwargs)
                 def read(self, node):
                     return storageutil.filtermetadata(self.revision(node))
                 def renamed(self, node):
                     return storageutil.filerevisioncopied(self, node)
                 def cmp(self, node, fulltext):
                     return not storageutil.filedataequivalent(self, node, fulltext)
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     nodes = [n for n in nodes if n != nullid]
                     if not nodes:
                         return
                     # TODO perform in a single query.
                     res = self._db.execute(
                         'SELECT revnum, deltaid FROM fileindex '
                         'WHERE pathid=? '
                         '    AND node in (%s)' % (','.join(['?'] * len(nodes))),
                         tuple([self._pathid] + nodes),
                     )
                     deltabases = {}
                     for rev, deltaid in res:
                         res = self._db.execute(
                             'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
                             (self._pathid, deltaid),
                         )
                         deltabases[rev] = res.fetchone()[0]
                     # TODO define revdifffn so we can use delta from storage.
                     for delta in storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         sqliterevisiondelta,
                         deltaparentfn=deltabases.__getitem__,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         deltamode=deltamode,
                     ):
                         yield delta
                 # End of ifiledata interface.
                 # Start of ifilemutation interface.
                 def add(self, filedata, meta, transaction, linkrev, p1, p2):
                     if meta or filedata.startswith(b'\x01\n'):
                         filedata = storageutil.packmeta(meta, filedata)
                     return self.addrevision(filedata, transaction, linkrev, p1, p2)
                 def addrevision(
                     self,
                     revisiondata,
                     transaction,
                     linkrev,
                     p1,
                     p2,
                     node=None,
                     flags=0,
                     cachedelta=None,
                 ):
                     if flags:
                         raise SQLiteStoreError(_(b'flags not supported on revisions'))
                     validatehash = node is not None
                     node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
                     if validatehash:
                         self._checkhash(revisiondata, node, p1, p2)
                     if node in self._nodetorev:
                         return node
                     node = self._addrawrevision(
                         node, revisiondata, transaction, linkrev, p1, p2
                     )
                     self._revisioncache[node] = revisiondata
                     return node
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     addrevisioncb=None,
                     maybemissingparents=False,
                 ):
                     nodes = []
                     for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
                         storeflags = 0
                         if wireflags & repository.REVISION_FLAG_CENSORED:
                             storeflags |= FLAG_CENSORED
                         if wireflags & ~repository.REVISION_FLAG_CENSORED:
                             raise SQLiteStoreError(b'unhandled revision flag')
                         if maybemissingparents:
                             if p1 != nullid and not self.hasnode(p1):
                                 p1 = nullid
                                 storeflags |= FLAG_MISSING_P1
                             if p2 != nullid and not self.hasnode(p2):
                                 p2 = nullid
                                 storeflags |= FLAG_MISSING_P2
                         baserev = self.rev(deltabase)
                         # If base is censored, delta must be full replacement in a single
                         # patch operation.
                         if baserev != nullrev and self.iscensored(baserev):
                             hlen = struct.calcsize(b'>lll')
                             oldlen = len(self.rawdata(deltabase, _verifyhash=False))
                             newlen = len(delta) - hlen
                             if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                 raise error.CensoredBaseError(self._path, deltabase)
                         if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
                             delta, baserev, lambda x: len(self.rawdata(x))
                         ):
                             storeflags |= FLAG_CENSORED
                         linkrev = linkmapper(linknode)
                         nodes.append(node)
                         if node in self._revisions:
                             # Possibly reset parents to make them proper.
                             entry = self._revisions[node]
                             if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
                                 entry.p1node = p1
                                 entry.p1rev = self._nodetorev[p1]
                                 entry.flags &= ~FLAG_MISSING_P1
                                 self._db.execute(
                                     'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
                                     (self._nodetorev[p1], entry.flags, entry.rid),
                                 )
                             if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
                                 entry.p2node = p2
                                 entry.p2rev = self._nodetorev[p2]
                                 entry.flags &= ~FLAG_MISSING_P2
                                 self._db.execute(
                                     'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
                                     (self._nodetorev[p1], entry.flags, entry.rid),
                                 )
                             continue
                         if deltabase == nullid:
                             text = mdiff.patch(b'', delta)
                             storedelta = None
                         else:
                             text = None
                             storedelta = (deltabase, delta)
                         self._addrawrevision(
                             node,
                             text,
                             transaction,
                             linkrev,
                             p1,
                             p2,
                             storedelta=storedelta,
                             flags=storeflags,
                         )
                         if addrevisioncb:
                             addrevisioncb(self, node)
                     return nodes
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                     # This restriction is cargo culted from revlogs and makes no sense for
                     # SQLite, since columns can be resized at will.
                     if len(tombstone) > len(self.rawdata(censornode)):
                         raise error.Abort(
                             _(b'censor tombstone must be no longer than censored data')
                         )
                     # We need to replace the censored revision's data with the tombstone.
                     # But replacing that data will have implications for delta chains that
                     # reference it.
                     #
                     # While "better," more complex strategies are possible, we do something
                     # simple: we find delta chain children of the censored revision and we
                     # replace those incremental deltas with fulltexts of their corresponding
                     # revision. Then we delete the now-unreferenced delta and original
                     # revision and insert a replacement.
                     # Find the delta to be censored.
                     censoreddeltaid = self._db.execute(
                         'SELECT deltaid FROM fileindex WHERE id=?',
                         (self._revisions[censornode].rid,),
                     ).fetchone()[0]
                     # Find all its delta chain children.
                     # TODO once we support storing deltas for !files, we'll need to look
                     # for those delta chains too.
                     rows = list(
                         self._db.execute(
                             'SELECT id, pathid, node FROM fileindex '
                             'WHERE deltabaseid=? OR deltaid=?',
                             (censoreddeltaid, censoreddeltaid),
                         )
                     )
                     for row in rows:
                         rid, pathid, node = row
                         fulltext = resolvedeltachain(
                             self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
                         )
-                        deltahash = hashlib.sha1(fulltext).digest()
+                        deltahash = hashutil.sha1(fulltext).digest()
                         if self._compengine == b'zstd':
                             deltablob = self._cctx.compress(fulltext)
                             compression = COMPRESSION_ZSTD
                         elif self._compengine == b'zlib':
                             deltablob = zlib.compress(fulltext)
                             compression = COMPRESSION_ZLIB
                         elif self._compengine == b'none':
                             deltablob = fulltext
                             compression = COMPRESSION_NONE
                         else:
                             raise error.ProgrammingError(
                                 b'unhandled compression engine: %s' % self._compengine
                             )
                         if len(deltablob) >= len(fulltext):
                             deltablob = fulltext
                             compression = COMPRESSION_NONE
                         deltaid = insertdelta(self._db, compression, deltahash, deltablob)
                         self._db.execute(
                             'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
                             'WHERE id=?',
                             (deltaid, rid),
                         )
                     # Now create the tombstone delta and replace the delta on the censored
                     # node.
-                    deltahash = hashlib.sha1(tombstone).digest()
+                    deltahash = hashutil.sha1(tombstone).digest()
                     tombstonedeltaid = insertdelta(
                         self._db, COMPRESSION_NONE, deltahash, tombstone
                     )
                     flags = self._revisions[censornode].flags
                     flags |= FLAG_CENSORED
                     self._db.execute(
                         'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
                         'WHERE pathid=? AND node=?',
                         (flags, tombstonedeltaid, self._pathid, censornode),
                     )
                     self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
                     self._refreshindex()
                     self._revisioncache.clear()
                 def getstrippoint(self, minlink):
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         [self.rev(n) for n in self.heads()],
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     if not len(self):
                         return
                     rev, _ignored = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     for rev in self.revs(rev):
                         self._db.execute(
                             'DELETE FROM fileindex WHERE pathid=? AND node=?',
                             (self._pathid, self.node(rev)),
                         )
                     # TODO how should we garbage collect data in delta table?
                     self._refreshindex()
                 # End of ifilemutation interface.
                 # Start of ifilestorage interface.
                 def files(self):
                     return []
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = []
                     if sharedfiles:
                         # TODO list sqlite file(s) here.
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(
                             len(self.revision(node)) for node in self._nodetorev
                         )
                     if storedsize:
                         # TODO implement this?
                         d[b'storedsize'] = None
                     return d
                 def verifyintegrity(self, state):
                     state[b'skipread'] = set()
                     for rev in self:
                         node = self.node(rev)
                         try:
                             self.revision(node)
                         except Exception as e:
                             yield sqliteproblem(
                                 error=_(b'unpacking %s: %s') % (short(node), e), node=node
                             )
                             state[b'skipread'].add(node)
                 # End of ifilestorage interface.
                 def _checkhash(self, fulltext, node, p1=None, p2=None):
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
                         return
                     try:
                         del self._revisioncache[node]
                     except KeyError:
                         pass
                     if storageutil.iscensoredtext(fulltext):
                         raise error.CensoredNodeError(self._path, node, fulltext)
                     raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
                 def _addrawrevision(
                     self,
                     node,
                     revisiondata,
                     transaction,
                     linkrev,
                     p1,
                     p2,
                     storedelta=None,
                     flags=0,
                 ):
                     if self._pathid is None:
                         res = self._db.execute(
                             'INSERT INTO filepath (path) VALUES (?)', (self._path,)
                         )
                         self._pathid = res.lastrowid
                     # For simplicity, always store a delta against p1.
                     # TODO we need a lot more logic here to make behavior reasonable.
                     if storedelta:
                         deltabase, delta = storedelta
                         if isinstance(deltabase, int):
                             deltabase = self.node(deltabase)
                     else:
                         assert revisiondata is not None
                         deltabase = p1
                         if deltabase == nullid:
                             delta = revisiondata
                         else:
                             delta = mdiff.textdiff(
                                 self.revision(self.rev(deltabase)), revisiondata
                             )
                     # File index stores a pointer to its delta and the parent delta.
                     # The parent delta is stored via a pointer to the fileindex PK.
                     if deltabase == nullid:
                         baseid = None
                     else:
                         baseid = self._revisions[deltabase].rid
                     # Deltas are stored with a hash of their content. This allows
                     # us to de-duplicate. The table is configured to ignore conflicts
                     # and it is faster to just insert and silently noop than to look
                     # first.
-                    deltahash = hashlib.sha1(delta).digest()
+                    deltahash = hashutil.sha1(delta).digest()
                     if self._compengine == b'zstd':
                         deltablob = self._cctx.compress(delta)
                         compression = COMPRESSION_ZSTD
                     elif self._compengine == b'zlib':
                         deltablob = zlib.compress(delta)
                         compression = COMPRESSION_ZLIB
                     elif self._compengine == b'none':
                         deltablob = delta
                         compression = COMPRESSION_NONE
                     else:
                         raise error.ProgrammingError(
                             b'unhandled compression engine: %s' % self._compengine
                         )
                     # Don't store compressed data if it isn't practical.
                     if len(deltablob) >= len(delta):
                         deltablob = delta
                         compression = COMPRESSION_NONE
                     deltaid = insertdelta(self._db, compression, deltahash, deltablob)
                     rev = len(self)
                     if p1 == nullid:
                         p1rev = nullrev
                     else:
                         p1rev = self._nodetorev[p1]
                     if p2 == nullid:
                         p2rev = nullrev
                     else:
                         p2rev = self._nodetorev[p2]
                     rid = self._db.execute(
                         'INSERT INTO fileindex ('
                         '    pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
                         '    deltaid, deltabaseid) '
                         '    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
                         (
                             self._pathid,
                             rev,
                             node,
                             p1rev,
                             p2rev,
                             linkrev,
                             flags,
                             deltaid,
                             baseid,
                         ),
                     ).lastrowid
                     entry = revisionentry(
                         rid=rid,
                         rev=rev,
                         node=node,
                         p1rev=p1rev,
                         p2rev=p2rev,
                         p1node=p1,
                         p2node=p2,
                         linkrev=linkrev,
                         flags=flags,
                     )
                     self._nodetorev[node] = rev
                     self._revtonode[rev] = node
                     self._revisions[node] = entry
                     return node
             class sqliterepository(localrepo.localrepository):
                 def cancopy(self):
                     return False
                 def transaction(self, *args, **kwargs):
                     current = self.currenttransaction()
                     tr = super(sqliterepository, self).transaction(*args, **kwargs)
                     if current:
                         return tr
                     self._dbconn.execute('BEGIN TRANSACTION')
                     def committransaction(_):
                         self._dbconn.commit()
                     tr.addfinalize(b'sqlitestore', committransaction)
                     return tr
                 @property
                 def _dbconn(self):
                     # SQLite connections can only be used on the thread that created
                     # them. In most cases, this "just works." However, hgweb uses
                     # multiple threads.
                     tid = threading.current_thread().ident
                     if self._db:
                         if self._db[0] == tid:
                             return self._db[1]
                     db = makedb(self.svfs.join(b'db.sqlite'))
                     self._db = (tid, db)
                     return db
             def makedb(path):
                 """Construct a database handle for a database at path."""
                 db = sqlite3.connect(encoding.strfromlocal(path))
                 db.text_factory = bytes
                 res = db.execute('PRAGMA user_version').fetchone()[0]
                 # New database.
                 if res == 0:
                     for statement in CREATE_SCHEMA:
                         db.execute(statement)
                     db.commit()
                 elif res == CURRENT_SCHEMA_VERSION:
                     pass
                 else:
                     raise error.Abort(_(b'sqlite database has unrecognized version'))
                 db.execute('PRAGMA journal_mode=WAL')
                 return db
             def featuresetup(ui, supported):
                 supported.add(REQUIREMENT)
                 if zstd:
                     supported.add(REQUIREMENT_ZSTD)
                 supported.add(REQUIREMENT_ZLIB)
                 supported.add(REQUIREMENT_NONE)
                 supported.add(REQUIREMENT_SHALLOW_FILES)
                 supported.add(repository.NARROW_REQUIREMENT)
             def newreporequirements(orig, ui, createopts):
                 if createopts[b'backend'] != b'sqlite':
                     return orig(ui, createopts)
                 # This restriction can be lifted once we have more confidence.
                 if b'sharedrepo' in createopts:
                     raise error.Abort(
                         _(b'shared repositories not supported with SQLite store')
                     )
                 # This filtering is out of an abundance of caution: we want to ensure
                 # we honor creation options and we do that by annotating exactly the
                 # creation options we recognize.
                 known = {
                     b'narrowfiles',
                     b'backend',
                     b'shallowfilestore',
                 }
                 unsupported = set(createopts) - known
                 if unsupported:
                     raise error.Abort(
                         _(b'SQLite store does not support repo creation option: %s')
                         % b', '.join(sorted(unsupported))
                     )
                 # Since we're a hybrid store that still relies on revlogs, we fall back
                 # to using the revlogv1 backend's storage requirements then adding our
                 # own requirement.
                 createopts[b'backend'] = b'revlogv1'
                 requirements = orig(ui, createopts)
                 requirements.add(REQUIREMENT)
                 compression = ui.config(b'storage', b'sqlite.compression')
                 if compression == b'zstd' and not zstd:
                     raise error.Abort(
                         _(
                             b'storage.sqlite.compression set to "zstd" but '
                             b'zstandard compression not available to this '
                             b'Mercurial install'
                         )
                     )
                 if compression == b'zstd':
                     requirements.add(REQUIREMENT_ZSTD)
                 elif compression == b'zlib':
                     requirements.add(REQUIREMENT_ZLIB)
                 elif compression == b'none':
                     requirements.add(REQUIREMENT_NONE)
                 else:
                     raise error.Abort(
                         _(
                             b'unknown compression engine defined in '
                             b'storage.sqlite.compression: %s'
                         )
                         % compression
                     )
                 if createopts.get(b'shallowfilestore'):
                     requirements.add(REQUIREMENT_SHALLOW_FILES)
                 return requirements
             @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
             class sqlitefilestorage(object):
                 """Repository file storage backed by SQLite."""
                 def file(self, path):
                     if path[0] == b'/':
                         path = path[1:]
                     if REQUIREMENT_ZSTD in self.requirements:
                         compression = b'zstd'
                     elif REQUIREMENT_ZLIB in self.requirements:
                         compression = b'zlib'
                     elif REQUIREMENT_NONE in self.requirements:
                         compression = b'none'
                     else:
                         raise error.Abort(
                             _(
                                 b'unable to determine what compression engine '
                                 b'to use for SQLite storage'
                             )
                         )
                     return sqlitefilestore(self._dbconn, path, compression)
             def makefilestorage(orig, requirements, features, **kwargs):
                 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
                 if REQUIREMENT in requirements:
                     if REQUIREMENT_SHALLOW_FILES in requirements:
                         features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
                     return sqlitefilestorage
                 else:
                     return orig(requirements=requirements, features=features, **kwargs)
             def makemain(orig, ui, requirements, **kwargs):
                 if REQUIREMENT in requirements:
                     if REQUIREMENT_ZSTD in requirements and not zstd:
                         raise error.Abort(
                             _(
                                 b'repository uses zstandard compression, which '
                                 b'is not available to this Mercurial install'
                             )
                         )
                     return sqliterepository
                 return orig(requirements=requirements, **kwargs)
             def verifierinit(orig, self, *args, **kwargs):
                 orig(self, *args, **kwargs)
                 # We don't care that files in the store don't align with what is
                 # advertised. So suppress these warnings.
                 self.warnorphanstorefiles = False
             def extsetup(ui):
                 localrepo.featuresetupfuncs.add(featuresetup)
                 extensions.wrapfunction(
                     localrepo, b'newreporequirements', newreporequirements
                 )
                 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
                 extensions.wrapfunction(localrepo, b'makemain', makemain)
                 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
             def reposetup(ui, repo):
                 if isinstance(repo, sqliterepository):
                     repo._db = None
                 # TODO check for bundlerepository?

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages