upstream/mercurial-mirror Commit - r40711:9fcf8084

py3: use node.hex(m.digest()) instead of m.hexdigest()...

Pulkit Goyal -

r40711:9fcf8084 default

parent child

hgext/fastannotate/context.py

0 +1 -1

             # Copyright 2016-present Facebook. All Rights Reserved.
             #
             # context: context needed to annotate a file
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             import contextlib
             import hashlib
             import os
             from mercurial.i18n import _
             from mercurial import (
                 error,
                 linelog as linelogmod,
                 lock as lockmod,
                 mdiff,
                 node,
                 pycompat,
                 scmutil,
                 util,
             )
             from mercurial.utils import (
                 stringutil,
             )
             from . import (
                 error as faerror,
                 revmap as revmapmod,
             )
             # given path, get filelog, cached
             @util.lrucachefunc
             def _getflog(repo, path):
                 return repo.file(path)
             # extracted from mercurial.context.basefilectx.annotate
             def _parents(f, follow=True):
                 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
                 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
                 # from the topmost introrev (= srcrev) down to p.linkrev() if it
                 # isn't an ancestor of the srcrev.
                 f._changeid
                 pl = f.parents()
                 # Don't return renamed parents if we aren't following.
                 if not follow:
                     pl = [p for p in pl if p.path() == f.path()]
                 # renamed filectx won't have a filelog yet, so set it
                 # from the cache to save time
                 for p in pl:
                     if not '_filelog' in p.__dict__:
                         p._filelog = _getflog(f._repo, p.path())
                 return pl
             # extracted from mercurial.context.basefilectx.annotate. slightly modified
             # so it takes a fctx instead of a pair of text and fctx.
             def _decorate(fctx):
                 text = fctx.data()
                 linecount = text.count('\n')
                 if text and not text.endswith('\n'):
                     linecount += 1
                 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
             # extracted from mercurial.context.basefilectx.annotate. slightly modified
             # so it takes an extra "blocks" parameter calculated elsewhere, instead of
             # calculating diff here.
             def _pair(parent, child, blocks):
                 for (a1, a2, b1, b2), t in blocks:
                     # Changed blocks ('!') or blocks made only of blank lines ('~')
                     # belong to the child.
                     if t == '=':
                         child[0][b1:b2] = parent[0][a1:a2]
                 return child
             # like scmutil.revsingle, but with lru cache, so their states (like manifests)
             # could be reused
             _revsingle = util.lrucachefunc(scmutil.revsingle)
             def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
                 """(repo, str, str) -> fctx
                 get the filectx object from repo, rev, path, in an efficient way.
                 if resolverev is True, "rev" is a revision specified by the revset
                 language, otherwise "rev" is a nodeid, or a revision number that can
                 be consumed by repo.__getitem__.
                 if adjustctx is not None, the returned fctx will point to a changeset
                 that introduces the change (last modified the file). if adjustctx
                 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
                 faster for big repos but is incorrect for some cases.
                 """
                 if resolverev and not isinstance(rev, int) and rev is not None:
                     ctx = _revsingle(repo, rev)
                 else:
                     ctx = repo[rev]
                 # If we don't need to adjust the linkrev, create the filectx using the
                 # changectx instead of using ctx[path]. This means it already has the
                 # changectx information, so blame -u will be able to look directly at the
                 # commitctx object instead of having to resolve it by going through the
                 # manifest. In a lazy-manifest world this can prevent us from downloading a
                 # lot of data.
                 if adjustctx is None:
                     # ctx.rev() is None means it's the working copy, which is a special
                     # case.
                     if ctx.rev() is None:
                         fctx = ctx[path]
                     else:
                         fctx = repo.filectx(path, changeid=ctx.rev())
                 else:
                     fctx = ctx[path]
                     if adjustctx == 'linkrev':
                         introrev = fctx.linkrev()
                     else:
                         introrev = fctx.introrev()
                     if introrev != ctx.rev():
                         fctx._changeid = introrev
                         fctx._changectx = repo[introrev]
                 return fctx
             # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
             def encodedir(path):
                 return (path
                         .replace('.hg/', '.hg.hg/')
                         .replace('.l/', '.l.hg/')
                         .replace('.m/', '.m.hg/')
                         .replace('.lock/', '.lock.hg/'))
             def hashdiffopts(diffopts):
                 diffoptstr = stringutil.pprint(sorted(
                     (k, getattr(diffopts, k))
                     for k in mdiff.diffopts.defaults
                 ))
-                return hashlib.sha1(diffoptstr).hexdigest()[:6]
+                return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
             _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
             class annotateopts(object):
                 """like mercurial.mdiff.diffopts, but is for annotate
                 followrename: follow renames, like "hg annotate -f"
                 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
                 """
                 defaults = {
                     'diffopts': None,
                     'followrename': True,
                     'followmerge': True,
                 }
                 def __init__(self, **opts):
                     opts = pycompat.byteskwargs(opts)
                     for k, v in self.defaults.iteritems():
                         setattr(self, k, opts.get(k, v))
                 @util.propertycache
                 def shortstr(self):
                     """represent opts in a short string, suitable for a directory name"""
                     result = ''
                     if not self.followrename:
                         result += 'r0'
                     if not self.followmerge:
                         result += 'm0'
                     if self.diffopts is not None:
                         assert isinstance(self.diffopts, mdiff.diffopts)
                         diffopthash = hashdiffopts(self.diffopts)
                         if diffopthash != _defaultdiffopthash:
                             result += 'i' + diffopthash
                     return result or 'default'
             defaultopts = annotateopts()
             class _annotatecontext(object):
                 """do not use this class directly as it does not use lock to protect
                 writes. use "with annotatecontext(...)" instead.
                 """
                 def __init__(self, repo, path, linelogpath, revmappath, opts):
                     self.repo = repo
                     self.ui = repo.ui
                     self.path = path
                     self.opts = opts
                     self.linelogpath = linelogpath
                     self.revmappath = revmappath
                     self._linelog = None
                     self._revmap = None
                     self._node2path = {} # {str: str}
                 @property
                 def linelog(self):
                     if self._linelog is None:
                         if os.path.exists(self.linelogpath):
                             with open(self.linelogpath, 'rb') as f:
                                 try:
                                     self._linelog = linelogmod.linelog.fromdata(f.read())
                                 except linelogmod.LineLogError:
                                     self._linelog = linelogmod.linelog()
                         else:
                             self._linelog = linelogmod.linelog()
                     return self._linelog
                 @property
                 def revmap(self):
                     if self._revmap is None:
                         self._revmap = revmapmod.revmap(self.revmappath)
                     return self._revmap
                 def close(self):
                     if self._revmap is not None:
                         self._revmap.flush()
                         self._revmap = None
                     if self._linelog is not None:
                         with open(self.linelogpath, 'wb') as f:
                             f.write(self._linelog.encode())
                         self._linelog = None
                 __del__ = close
                 def rebuild(self):
                     """delete linelog and revmap, useful for rebuilding"""
                     self.close()
                     self._node2path.clear()
                     _unlinkpaths([self.revmappath, self.linelogpath])
                 @property
                 def lastnode(self):
                     """return last node in revmap, or None if revmap is empty"""
                     if self._revmap is None:
                         # fast path, read revmap without loading its full content
                         return revmapmod.getlastnode(self.revmappath)
                     else:
                         return self._revmap.rev2hsh(self._revmap.maxrev)
                 def isuptodate(self, master, strict=True):
                     """return True if the revmap / linelog is up-to-date, or the file
                     does not exist in the master revision. False otherwise.
                     it tries to be fast and could return false negatives, because of the
                     use of linkrev instead of introrev.
                     useful for both server and client to decide whether to update
                     fastannotate cache or not.
                     if strict is True, even if fctx exists in the revmap, but is not the
                     last node, isuptodate will return False. it's good for performance - no
                     expensive check was done.
                     if strict is False, if fctx exists in the revmap, this function may
                     return True. this is useful for the client to skip downloading the
                     cache if the client's master is behind the server's.
                     """
                     lastnode = self.lastnode
                     try:
                         f = self._resolvefctx(master, resolverev=True)
                         # choose linkrev instead of introrev as the check is meant to be
                         # *fast*.
                         linknode = self.repo.changelog.node(f.linkrev())
                         if not strict and lastnode and linknode != lastnode:
                             # check if f.node() is in the revmap. note: this loads the
                             # revmap and can be slow.
                             return self.revmap.hsh2rev(linknode) is not None
                         # avoid resolving old manifest, or slow adjustlinkrev to be fast,
                         # false negatives are acceptable in this case.
                         return linknode == lastnode
                     except LookupError:
                         # master does not have the file, or the revmap is ahead
                         return True
                 def annotate(self, rev, master=None, showpath=False, showlines=False):
                     """incrementally update the cache so it includes revisions in the main
                     branch till 'master'. and run annotate on 'rev', which may or may not be
                     included in the main branch.
                     if master is None, do not update linelog.
                     the first value returned is the annotate result, it is [(node, linenum)]
                     by default. [(node, linenum, path)] if showpath is True.
                     if showlines is True, a second value will be returned, it is a list of
                     corresponding line contents.
                     """
                     # the fast path test requires commit hash, convert rev number to hash,
                     # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
                     # command could give us a revision number even if the user passes a
                     # commit hash.
                     if isinstance(rev, int):
                         rev = node.hex(self.repo.changelog.node(rev))
                     # fast path: if rev is in the main branch already
                     directly, revfctx = self.canannotatedirectly(rev)
                     if directly:
                         if self.ui.debugflag:
                             self.ui.debug('fastannotate: %s: using fast path '
                                           '(resolved fctx: %s)\n'
                                           % (self.path,
                                              stringutil.pprint(util.safehasattr(revfctx,
                                                                                 'node'))))
                         return self.annotatedirectly(revfctx, showpath, showlines)
                     # resolve master
                     masterfctx = None
                     if master:
                         try:
                             masterfctx = self._resolvefctx(master, resolverev=True,
                                                            adjustctx=True)
                         except LookupError: # master does not have the file
                             pass
                         else:
                             if masterfctx in self.revmap: # no need to update linelog
                                 masterfctx = None
                     #                  ... - @ <- rev (can be an arbitrary changeset,
                     #                 /                not necessarily a descendant
                     #      master -> o                 of master)
                     #                |
                     #     a merge -> o         'o': new changesets in the main branch
                     #                |\        '#': revisions in the main branch that
                     #                o *            exist in linelog / revmap
                     #                | .       '*': changesets in side branches, or
                     # last master -> # .            descendants of master
                     #                | .
                     #                # *       joint: '#', and is a parent of a '*'
                     #                |/
                     #     a joint -> # ^^^^ --- side branches
                     #                |
                     #                ^ --- main branch (in linelog)
                     # these DFSes are similar to the traditional annotate algorithm.
                     # we cannot really reuse the code for perf reason.
                     # 1st DFS calculates merges, joint points, and needed.
                     # "needed" is a simple reference counting dict to free items in
                     # "hist", reducing its memory usage otherwise could be huge.
                     initvisit = [revfctx]
                     if masterfctx:
                         if masterfctx.rev() is None:
                             raise error.Abort(_('cannot update linelog to wdir()'),
                                               hint=_('set fastannotate.mainbranch'))
                         initvisit.append(masterfctx)
                     visit = initvisit[:]
                     pcache = {}
                     needed = {revfctx: 1}
                     hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
                     while visit:
                         f = visit.pop()
                         if f in pcache or f in hist:
                             continue
                         if f in self.revmap: # in the old main branch, it's a joint
                             llrev = self.revmap.hsh2rev(f.node())
                             self.linelog.annotate(llrev)
                             result = self.linelog.annotateresult
                             hist[f] = (result, f.data())
                             continue
                         pl = self._parentfunc(f)
                         pcache[f] = pl
                         for p in pl:
                             needed[p] = needed.get(p, 0) + 1
                             if p not in pcache:
                                 visit.append(p)
                     # 2nd (simple) DFS calculates new changesets in the main branch
                     # ('o' nodes in # the above graph), so we know when to update linelog.
                     newmainbranch = set()
                     f = masterfctx
                     while f and f not in self.revmap:
                         newmainbranch.add(f)
                         pl = pcache[f]
                         if pl:
                             f = pl[0]
                         else:
                             f = None
                             break
                     # f, if present, is the position where the last build stopped at, and
                     # should be the "master" last time. check to see if we can continue
                     # building the linelog incrementally. (we cannot if diverged)
                     if masterfctx is not None:
                         self._checklastmasterhead(f)
                     if self.ui.debugflag:
                         if newmainbranch:
                             self.ui.debug('fastannotate: %s: %d new changesets in the main'
                                           ' branch\n' % (self.path, len(newmainbranch)))
                         elif not hist: # no joints, no updates
                             self.ui.debug('fastannotate: %s: linelog cannot help in '
                                           'annotating this revision\n' % self.path)
                     # prepare annotateresult so we can update linelog incrementally
                     self.linelog.annotate(self.linelog.maxrev)
                     # 3rd DFS does the actual annotate
                     visit = initvisit[:]
                     progress = 0
                     while visit:
                         f = visit[-1]
                         if f in hist:
                             visit.pop()
                             continue
                         ready = True
                         pl = pcache[f]
                         for p in pl:
                             if p not in hist:
                                 ready = False
                                 visit.append(p)
                         if not ready:
                             continue
                         visit.pop()
                         blocks = None # mdiff blocks, used for appending linelog
                         ismainbranch = (f in newmainbranch)
                         # curr is the same as the traditional annotate algorithm,
                         # if we only care about linear history (do not follow merge),
                         # then curr is not actually used.
                         assert f not in hist
                         curr = _decorate(f)
                         for i, p in enumerate(pl):
                             bs = list(self._diffblocks(hist[p][1], curr[1]))
                             if i == 0 and ismainbranch:
                                 blocks = bs
                             curr = _pair(hist[p], curr, bs)
                             if needed[p] == 1:
                                 del hist[p]
                                 del needed[p]
                             else:
                                 needed[p] -= 1
                         hist[f] = curr
                         del pcache[f]
                         if ismainbranch: # need to write to linelog
                             if not self.ui.quiet:
                                 progress += 1
                                 self.ui.progress(_('building cache'), progress,
                                                  total=len(newmainbranch))
                             bannotated = None
                             if len(pl) == 2 and self.opts.followmerge: # merge
                                 bannotated = curr[0]
                             if blocks is None: # no parents, add an empty one
                                 blocks = list(self._diffblocks('', curr[1]))
                             self._appendrev(f, blocks, bannotated)
                         elif showpath: # not append linelog, but we need to record path
                             self._node2path[f.node()] = f.path()
                     if progress: # clean progress bar
                         self.ui.write()
                     result = [
                         ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
                         for fr, l in hist[revfctx][0]] # [(node, linenumber)]
                     return self._refineannotateresult(result, revfctx, showpath, showlines)
                 def canannotatedirectly(self, rev):
                     """(str) -> bool, fctx or node.
                     return (True, f) if we can annotate without updating the linelog, pass
                     f to annotatedirectly.
                     return (False, f) if we need extra calculation. f is the fctx resolved
                     from rev.
                     """
                     result = True
                     f = None
                     if not isinstance(rev, int) and rev is not None:
                         hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
                         if hsh is not None and (hsh, self.path) in self.revmap:
                             f = hsh
                     if f is None:
                         adjustctx = 'linkrev' if self._perfhack else True
                         f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
                         result = f in self.revmap
                         if not result and self._perfhack:
                             # redo the resolution without perfhack - as we are going to
                             # do write operations, we need a correct fctx.
                             f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
                     return result, f
                 def annotatealllines(self, rev, showpath=False, showlines=False):
                     """(rev : str) -> [(node : str, linenum : int, path : str)]
                     the result has the same format with annotate, but include all (including
                     deleted) lines up to rev. call this after calling annotate(rev, ...) for
                     better performance and accuracy.
                     """
                     revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
                     # find a chain from rev to anything in the mainbranch
                     if revfctx not in self.revmap:
                         chain = [revfctx]
                         a = ''
                         while True:
                             f = chain[-1]
                             pl = self._parentfunc(f)
                             if not pl:
                                 break
                             if pl[0] in self.revmap:
                                 a = pl[0].data()
                                 break
                             chain.append(pl[0])
                         # both self.linelog and self.revmap is backed by filesystem. now
                         # we want to modify them but do not want to write changes back to
                         # files. so we create in-memory objects and copy them. it's like
                         # a "fork".
                         linelog = linelogmod.linelog()
                         linelog.copyfrom(self.linelog)
                         linelog.annotate(linelog.maxrev)
                         revmap = revmapmod.revmap()
                         revmap.copyfrom(self.revmap)
                         for f in reversed(chain):
                             b = f.data()
                             blocks = list(self._diffblocks(a, b))
                             self._doappendrev(linelog, revmap, f, blocks)
                             a = b
                     else:
                         # fastpath: use existing linelog, revmap as we don't write to them
                         linelog = self.linelog
                         revmap = self.revmap
                     lines = linelog.getalllines()
                     hsh = revfctx.node()
                     llrev = revmap.hsh2rev(hsh)
                     result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
                     # cannot use _refineannotateresult since we need custom logic for
                     # resolving line contents
                     if showpath:
                         result = self._addpathtoresult(result, revmap)
                     if showlines:
                         linecontents = self._resolvelines(result, revmap, linelog)
                         result = (result, linecontents)
                     return result
                 def _resolvelines(self, annotateresult, revmap, linelog):
                     """(annotateresult) -> [line]. designed for annotatealllines.
                     this is probably the most inefficient code in the whole fastannotate
                     directory. but we have made a decision that the linelog does not
                     store line contents. so getting them requires random accesses to
                     the revlog data, since they can be many, it can be very slow.
                     """
                     # [llrev]
                     revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
                     result = [None] * len(annotateresult)
                     # {(rev, linenum): [lineindex]}
                     key2idxs = collections.defaultdict(list)
                     for i in pycompat.xrange(len(result)):
                         key2idxs[(revs[i], annotateresult[i][1])].append(i)
                     while key2idxs:
                         # find an unresolved line and its linelog rev to annotate
                         hsh = None
                         try:
                             for (rev, _linenum), idxs in key2idxs.iteritems():
                                 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
                                     continue
                                 hsh = annotateresult[idxs[0]][0]
                                 break
                         except StopIteration: # no more unresolved lines
                             return result
                         if hsh is None:
                             # the remaining key2idxs are not in main branch, resolving them
                             # using the hard way...
                             revlines = {}
                             for (rev, linenum), idxs in key2idxs.iteritems():
                                 if rev not in revlines:
                                     hsh = annotateresult[idxs[0]][0]
                                     if self.ui.debugflag:
                                         self.ui.debug('fastannotate: reading %s line #%d '
                                                       'to resolve lines %r\n'
                                                       % (node.short(hsh), linenum, idxs))
                                     fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                                     lines = mdiff.splitnewlines(fctx.data())
                                     revlines[rev] = lines
                                 for idx in idxs:
                                     result[idx] = revlines[rev][linenum]
                             assert all(x is not None for x in result)
                             return result
                         # run the annotate and the lines should match to the file content
                         self.ui.debug('fastannotate: annotate %s to resolve lines\n'
                                       % node.short(hsh))
                         linelog.annotate(rev)
                         fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                         annotated = linelog.annotateresult
                         lines = mdiff.splitnewlines(fctx.data())
                         if len(lines) != len(annotated):
                             raise faerror.CorruptedFileError('unexpected annotated lines')
                         # resolve lines from the annotate result
                         for i, line in enumerate(lines):
                             k = annotated[i]
                             if k in key2idxs:
                                 for idx in key2idxs[k]:
                                     result[idx] = line
                                 del key2idxs[k]
                     return result
                 def annotatedirectly(self, f, showpath, showlines):
                     """like annotate, but when we know that f is in linelog.
                     f can be either a 20-char str (node) or a fctx. this is for perf - in
                     the best case, the user provides a node and we don't need to read the
                     filelog or construct any filecontext.
                     """
                     if isinstance(f, str):
                         hsh = f
                     else:
                         hsh = f.node()
                     llrev = self.revmap.hsh2rev(hsh)
                     if not llrev:
                         raise faerror.CorruptedFileError('%s is not in revmap'
                                                          % node.hex(hsh))
                     if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
                         raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
                                                          % node.hex(hsh))
                     self.linelog.annotate(llrev)
                     result = [(self.revmap.rev2hsh(r), l)
                               for r, l in self.linelog.annotateresult]
                     return self._refineannotateresult(result, f, showpath, showlines)
                 def _refineannotateresult(self, result, f, showpath, showlines):
                     """add the missing path or line contents, they can be expensive.
                     f could be either node or fctx.
                     """
                     if showpath:
                         result = self._addpathtoresult(result)
                     if showlines:
                         if isinstance(f, str): # f: node or fctx
                             llrev = self.revmap.hsh2rev(f)
                             fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
                         else:
                             fctx = f
                         lines = mdiff.splitnewlines(fctx.data())
                         if len(lines) != len(result): # linelog is probably corrupted
                             raise faerror.CorruptedFileError()
                         result = (result, lines)
                     return result
                 def _appendrev(self, fctx, blocks, bannotated=None):
                     self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
                 def _diffblocks(self, a, b):
                     return mdiff.allblocks(a, b, self.opts.diffopts)
                 @staticmethod
                 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
                     """append a revision to linelog and revmap"""
                     def getllrev(f):
                         """(fctx) -> int"""
                         # f should not be a linelog revision
                         if isinstance(f, int):
                             raise error.ProgrammingError('f should not be an int')
                         # f is a fctx, allocate linelog rev on demand
                         hsh = f.node()
                         rev = revmap.hsh2rev(hsh)
                         if rev is None:
                             rev = revmap.append(hsh, sidebranch=True, path=f.path())
                         return rev
                     # append sidebranch revisions to revmap
                     siderevs = []
                     siderevmap = {} # node: int
                     if bannotated is not None:
                         for (a1, a2, b1, b2), op in blocks:
                             if op != '=':
                                 # f could be either linelong rev, or fctx.
                                 siderevs += [f for f, l in bannotated[b1:b2]
                                              if not isinstance(f, int)]
                     siderevs = set(siderevs)
                     if fctx in siderevs: # mainnode must be appended seperately
                         siderevs.remove(fctx)
                     for f in siderevs:
                         siderevmap[f] = getllrev(f)
                     # the changeset in the main branch, could be a merge
                     llrev = revmap.append(fctx.node(), path=fctx.path())
                     siderevmap[fctx] = llrev
                     for (a1, a2, b1, b2), op in reversed(blocks):
                         if op == '=':
                             continue
                         if bannotated is None:
                             linelog.replacelines(llrev, a1, a2, b1, b2)
                         else:
                             blines = [((r if isinstance(r, int) else siderevmap[r]), l)
                                       for r, l in bannotated[b1:b2]]
                             linelog.replacelines_vec(llrev, a1, a2, blines)
                 def _addpathtoresult(self, annotateresult, revmap=None):
                     """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
                     if revmap is None:
                         revmap = self.revmap
                     def _getpath(nodeid):
                         path = self._node2path.get(nodeid)
                         if path is None:
                             path = revmap.rev2path(revmap.hsh2rev(nodeid))
                             self._node2path[nodeid] = path
                         return path
                     return [(n, l, _getpath(n)) for n, l in annotateresult]
                 def _checklastmasterhead(self, fctx):
                     """check if fctx is the master's head last time, raise if not"""
                     if fctx is None:
                         llrev = 0
                     else:
                         llrev = self.revmap.hsh2rev(fctx.node())
                         if not llrev:
                             raise faerror.CannotReuseError()
                     if self.linelog.maxrev != llrev:
                         raise faerror.CannotReuseError()
                 @util.propertycache
                 def _parentfunc(self):
                     """-> (fctx) -> [fctx]"""
                     followrename = self.opts.followrename
                     followmerge = self.opts.followmerge
                     def parents(f):
                         pl = _parents(f, follow=followrename)
                         if not followmerge:
                             pl = pl[:1]
                         return pl
                     return parents
                 @util.propertycache
                 def _perfhack(self):
                     return self.ui.configbool('fastannotate', 'perfhack')
                 def _resolvefctx(self, rev, path=None, **kwds):
                     return resolvefctx(self.repo, rev, (path or self.path), **kwds)
             def _unlinkpaths(paths):
                 """silent, best-effort unlink"""
                 for path in paths:
                     try:
                         util.unlink(path)
                     except OSError:
                         pass
             class pathhelper(object):
                 """helper for getting paths for lockfile, linelog and revmap"""
                 def __init__(self, repo, path, opts=defaultopts):
                     # different options use different directories
                     self._vfspath = os.path.join('fastannotate',
                                                  opts.shortstr, encodedir(path))
                     self._repo = repo
                 @property
                 def dirname(self):
                     return os.path.dirname(self._repo.vfs.join(self._vfspath))
                 @property
                 def linelogpath(self):
                     return self._repo.vfs.join(self._vfspath + '.l')
                 def lock(self):
                     return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
                 @contextlib.contextmanager
                 def _lockflock(self):
                     """the same as 'lock' but use flock instead of lockmod.lock, to avoid
                     creating temporary symlinks."""
                     import fcntl
                     lockpath = self.linelogpath
                     util.makedirs(os.path.dirname(lockpath))
                     lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
                     fcntl.flock(lockfd, fcntl.LOCK_EX)
                     try:
                         yield
                     finally:
                         fcntl.flock(lockfd, fcntl.LOCK_UN)
                         os.close(lockfd)
                 @property
                 def revmappath(self):
                     return self._repo.vfs.join(self._vfspath + '.m')
             @contextlib.contextmanager
             def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
                 """context needed to perform (fast) annotate on a file
                 an annotatecontext of a single file consists of two structures: the
                 linelog and the revmap. this function takes care of locking. only 1
                 process is allowed to write that file's linelog and revmap at a time.
                 when something goes wrong, this function will assume the linelog and the
                 revmap are in a bad state, and remove them from disk.
                 use this function in the following way:
                     with annotatecontext(...) as actx:
                         actx. ....
                 """
                 helper = pathhelper(repo, path, opts)
                 util.makedirs(helper.dirname)
                 revmappath = helper.revmappath
                 linelogpath = helper.linelogpath
                 actx = None
                 try:
                     with helper.lock():
                         actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
                         if rebuild:
                             actx.rebuild()
                         yield actx
                 except Exception:
                     if actx is not None:
                         actx.rebuild()
                     repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
                     raise
                 finally:
                     if actx is not None:
                         actx.close()
             def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
                 """like annotatecontext but get the context from a fctx. convenient when
                 used in fctx.annotate
                 """
                 repo = fctx._repo
                 path = fctx._path
                 if repo.ui.configbool('fastannotate', 'forcefollow', True):
                     follow = True
                 aopts = annotateopts(diffopts=diffopts, followrename=follow)
                 return annotatecontext(repo, path, aopts, rebuild)

hgext/largefiles/lfcommands.py

0 +1 -1

             # Copyright 2009-2010 Gregory P. Ward
             # Copyright 2009-2010 Intelerad Medical Systems Incorporated
             # Copyright 2010-2011 Fog Creek Software
             # Copyright 2010-2011 Unity Technologies
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             '''High-level command function for lfconvert, plus the cmdtable.'''
             from __future__ import absolute_import
             import errno
             import hashlib
             import os
             import shutil
             from mercurial.i18n import _
             from mercurial import (
                 cmdutil,
                 context,
                 error,
                 hg,
                 lock,
                 match as matchmod,
                 node,
                 pycompat,
                 registrar,
                 scmutil,
                 util,
             )
             from ..convert import (
                 convcmd,
                 filemap,
             )
             from . import (
                 lfutil,
                 storefactory
             )
             release = lock.release
             # -- Commands ----------------------------------------------------------
             cmdtable = {}
             command = registrar.command(cmdtable)
             @command('lfconvert',
                 [('s', 'size', '',
                   _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
                 ('', 'to-normal', False,
                  _('convert from a largefiles repo to a normal repo')),
                 ],
                 _('hg lfconvert SOURCE DEST [FILE ...]'),
                 norepo=True,
                 inferrepo=True)
             def lfconvert(ui, src, dest, *pats, **opts):
                 '''convert a normal repository to a largefiles repository
                 Convert repository SOURCE to a new repository DEST, identical to
                 SOURCE except that certain files will be converted as largefiles:
                 specifically, any file that matches any PATTERN *or* whose size is
                 above the minimum size threshold is converted as a largefile. The
                 size used to determine whether or not to track a file as a
                 largefile is the size of the first version of the file. The
                 minimum size can be specified either with --size or in
                 configuration as ``largefiles.size``.
                 After running this command you will need to make sure that
                 largefiles is enabled anywhere you intend to push the new
                 repository.
                 Use --to-normal to convert largefiles back to normal files; after
                 this, the DEST repository can be used without largefiles at all.'''
                 opts = pycompat.byteskwargs(opts)
                 if opts['to_normal']:
                     tolfile = False
                 else:
                     tolfile = True
                     size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
                 if not hg.islocal(src):
                     raise error.Abort(_('%s is not a local Mercurial repo') % src)
                 if not hg.islocal(dest):
                     raise error.Abort(_('%s is not a local Mercurial repo') % dest)
                 rsrc = hg.repository(ui, src)
                 ui.status(_('initializing destination %s\n') % dest)
                 rdst = hg.repository(ui, dest, create=True)
                 success = False
                 dstwlock = dstlock = None
                 try:
                     # Get a list of all changesets in the source.  The easy way to do this
                     # is to simply walk the changelog, using changelog.nodesbetween().
                     # Take a look at mercurial/revlog.py:639 for more details.
                     # Use a generator instead of a list to decrease memory usage
                     ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
                         rsrc.heads())[0])
                     revmap = {node.nullid: node.nullid}
                     if tolfile:
                         # Lock destination to prevent modification while it is converted to.
                         # Don't need to lock src because we are just reading from its
                         # history which can't change.
                         dstwlock = rdst.wlock()
                         dstlock = rdst.lock()
                         lfiles = set()
                         normalfiles = set()
                         if not pats:
                             pats = ui.configlist(lfutil.longname, 'patterns')
                         if pats:
                             matcher = matchmod.match(rsrc.root, '', list(pats))
                         else:
                             matcher = None
                         lfiletohash = {}
                         with ui.makeprogress(_('converting revisions'),
                                              unit=_('revisions'),
                                              total=rsrc['tip'].rev()) as progress:
                             for ctx in ctxs:
                                 progress.update(ctx.rev())
                                 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
                                     lfiles, normalfiles, matcher, size, lfiletohash)
                         if rdst.wvfs.exists(lfutil.shortname):
                             rdst.wvfs.rmtree(lfutil.shortname)
                         for f in lfiletohash.keys():
                             if rdst.wvfs.isfile(f):
                                 rdst.wvfs.unlink(f)
                             try:
                                 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
                             except OSError:
                                 pass
                         # If there were any files converted to largefiles, add largefiles
                         # to the destination repository's requirements.
                         if lfiles:
                             rdst.requirements.add('largefiles')
                             rdst._writerequirements()
                     else:
                         class lfsource(filemap.filemap_source):
                             def __init__(self, ui, source):
                                 super(lfsource, self).__init__(ui, source, None)
                                 self.filemapper.rename[lfutil.shortname] = '.'
                             def getfile(self, name, rev):
                                 realname, realrev = rev
                                 f = super(lfsource, self).getfile(name, rev)
                                 if (not realname.startswith(lfutil.shortnameslash)
                                         or f[0] is None):
                                     return f
                                 # Substitute in the largefile data for the hash
                                 hash = f[0].strip()
                                 path = lfutil.findfile(rsrc, hash)
                                 if path is None:
                                     raise error.Abort(_("missing largefile for '%s' in %s")
                                                       % (realname, realrev))
                                 return util.readfile(path), f[1]
                         class converter(convcmd.converter):
                             def __init__(self, ui, source, dest, revmapfile, opts):
                                 src = lfsource(ui, source)
                                 super(converter, self).__init__(ui, src, dest, revmapfile,
                                                                 opts)
                         found, missing = downloadlfiles(ui, rsrc)
                         if missing != 0:
                             raise error.Abort(_("all largefiles must be present locally"))
                         orig = convcmd.converter
                         convcmd.converter = converter
                         try:
                             convcmd.convert(ui, src, dest, source_type='hg', dest_type='hg')
                         finally:
                             convcmd.converter = orig
                     success = True
                 finally:
                     if tolfile:
                         rdst.dirstate.clear()
                         release(dstlock, dstwlock)
                     if not success:
                         # we failed, remove the new directory
                         shutil.rmtree(rdst.root)
             def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
                     matcher, size, lfiletohash):
                 # Convert src parents to dst parents
                 parents = _convertparents(ctx, revmap)
                 # Generate list of changed files
                 files = _getchangedfiles(ctx, parents)
                 dstfiles = []
                 for f in files:
                     if f not in lfiles and f not in normalfiles:
                         islfile = _islfile(f, ctx, matcher, size)
                         # If this file was renamed or copied then copy
                         # the largefile-ness of its predecessor
                         if f in ctx.manifest():
                             fctx = ctx.filectx(f)
                             renamed = fctx.renamed()
                             if renamed is None:
                                 # the code below assumes renamed to be a boolean or a list
                                 # and won't quite work with the value None
                                 renamed = False
                             renamedlfile = renamed and renamed[0] in lfiles
                             islfile |= renamedlfile
                             if 'l' in fctx.flags():
                                 if renamedlfile:
                                     raise error.Abort(
                                         _('renamed/copied largefile %s becomes symlink')
                                         % f)
                                 islfile = False
                         if islfile:
                             lfiles.add(f)
                         else:
                             normalfiles.add(f)
                     if f in lfiles:
                         fstandin = lfutil.standin(f)
                         dstfiles.append(fstandin)
                         # largefile in manifest if it has not been removed/renamed
                         if f in ctx.manifest():
                             fctx = ctx.filectx(f)
                             if 'l' in fctx.flags():
                                 renamed = fctx.renamed()
                                 if renamed and renamed[0] in lfiles:
                                     raise error.Abort(_('largefile %s becomes symlink') % f)
                             # largefile was modified, update standins
                             m = hashlib.sha1('')
                             m.update(ctx[f].data())
-                            hash = m.hexdigest()
+                            hash = node.hex(m.digest())
                             if f not in lfiletohash or lfiletohash[f] != hash:
                                 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
                                 executable = 'x' in ctx[f].flags()
                                 lfutil.writestandin(rdst, fstandin, hash,
                                     executable)
                                 lfiletohash[f] = hash
                     else:
                         # normal file
                         dstfiles.append(f)
                 def getfilectx(repo, memctx, f):
                     srcfname = lfutil.splitstandin(f)
                     if srcfname is not None:
                         # if the file isn't in the manifest then it was removed
                         # or renamed, return None to indicate this
                         try:
                             fctx = ctx.filectx(srcfname)
                         except error.LookupError:
                             return None
                         renamed = fctx.renamed()
                         if renamed:
                             # standin is always a largefile because largefile-ness
                             # doesn't change after rename or copy
                             renamed = lfutil.standin(renamed[0])
                         return context.memfilectx(repo, memctx, f,
                                                   lfiletohash[srcfname] + '\n',
                                                   'l' in fctx.flags(), 'x' in fctx.flags(),
                                                   renamed)
                     else:
                         return _getnormalcontext(repo, ctx, f, revmap)
                 # Commit
                 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
             def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
                 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
                                       getfilectx, ctx.user(), ctx.date(), ctx.extra())
                 ret = rdst.commitctx(mctx)
                 lfutil.copyalltostore(rdst, ret)
                 rdst.setparents(ret)
                 revmap[ctx.node()] = rdst.changelog.tip()
             # Generate list of changed files
             def _getchangedfiles(ctx, parents):
                 files = set(ctx.files())
                 if node.nullid not in parents:
                     mc = ctx.manifest()
                     mp1 = ctx.parents()[0].manifest()
                     mp2 = ctx.parents()[1].manifest()
                     files |= (set(mp1) | set(mp2)) - set(mc)
                     for f in mc:
                         if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
                             files.add(f)
                 return files
             # Convert src parents to dst parents
             def _convertparents(ctx, revmap):
                 parents = []
                 for p in ctx.parents():
                     parents.append(revmap[p.node()])
                 while len(parents) < 2:
                     parents.append(node.nullid)
                 return parents
             # Get memfilectx for a normal file
             def _getnormalcontext(repo, ctx, f, revmap):
                 try:
                     fctx = ctx.filectx(f)
                 except error.LookupError:
                     return None
                 renamed = fctx.renamed()
                 if renamed:
                     renamed = renamed[0]
                 data = fctx.data()
                 if f == '.hgtags':
                     data = _converttags (repo.ui, revmap, data)
                 return context.memfilectx(repo, ctx, f, data, 'l' in fctx.flags(),
                                           'x' in fctx.flags(), renamed)
             # Remap tag data using a revision map
             def _converttags(ui, revmap, data):
                 newdata = []
                 for line in data.splitlines():
                     try:
                         id, name = line.split(' ', 1)
                     except ValueError:
                         ui.warn(_('skipping incorrectly formatted tag %s\n')
                             % line)
                         continue
                     try:
                         newid = node.bin(id)
                     except TypeError:
                         ui.warn(_('skipping incorrectly formatted id %s\n')
                             % id)
                         continue
                     try:
                         newdata.append('%s %s\n' % (node.hex(revmap[newid]),
                             name))
                     except KeyError:
                         ui.warn(_('no mapping for id %s\n') % id)
                         continue
                 return ''.join(newdata)
             def _islfile(file, ctx, matcher, size):
                 '''Return true if file should be considered a largefile, i.e.
                 matcher matches it or it is larger than size.'''
                 # never store special .hg* files as largefiles
                 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
                     return False
                 if matcher and matcher(file):
                     return True
                 try:
                     return ctx.filectx(file).size() >= size * 1024 * 1024
                 except error.LookupError:
                     return False
             def uploadlfiles(ui, rsrc, rdst, files):
                 '''upload largefiles to the central store'''
                 if not files:
                     return
                 store = storefactory.openstore(rsrc, rdst, put=True)
                 at = 0
                 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
                 retval = store.exists(files)
                 files = [h for h in files if not retval[h]]
                 ui.debug("%d largefiles need to be uploaded\n" % len(files))
                 with ui.makeprogress(_('uploading largefiles'), unit=_('files'),
                                      total=len(files)) as progress:
                     for hash in files:
                         progress.update(at)
                         source = lfutil.findfile(rsrc, hash)
                         if not source:
                             raise error.Abort(_('largefile %s missing from store'
                                                ' (needs to be uploaded)') % hash)
                         # XXX check for errors here
                         store.put(source, hash)
                         at += 1
             def verifylfiles(ui, repo, all=False, contents=False):
                 '''Verify that every largefile revision in the current changeset
                 exists in the central store.  With --contents, also verify that
                 the contents of each local largefile file revision are correct (SHA-1 hash
                 matches the revision ID).  With --all, check every changeset in
                 this repository.'''
                 if all:
                     revs = repo.revs('all()')
                 else:
                     revs = ['.']
                 store = storefactory.openstore(repo)
                 return store.verify(revs, contents=contents)
             def cachelfiles(ui, repo, node, filelist=None):
                 '''cachelfiles ensures that all largefiles needed by the specified revision
                 are present in the repository's largefile cache.
                 returns a tuple (cached, missing).  cached is the list of files downloaded
                 by this operation; missing is the list of files that were needed but could
                 not be found.'''
                 lfiles = lfutil.listlfiles(repo, node)
                 if filelist:
                     lfiles = set(lfiles) & set(filelist)
                 toget = []
                 ctx = repo[node]
                 for lfile in lfiles:
                     try:
                         expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
                     except IOError as err:
                         if err.errno == errno.ENOENT:
                             continue # node must be None and standin wasn't found in wctx
                         raise
                     if not lfutil.findfile(repo, expectedhash):
                         toget.append((lfile, expectedhash))
                 if toget:
                     store = storefactory.openstore(repo)
                     ret = store.get(toget)
                     return ret
                 return ([], [])
             def downloadlfiles(ui, repo, rev=None):
                 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
                 def prepare(ctx, fns):
                     pass
                 totalsuccess = 0
                 totalmissing = 0
                 if rev != []: # walkchangerevs on empty list would return all revs
                     for ctx in cmdutil.walkchangerevs(repo, match, {'rev' : rev},
                                                       prepare):
                         success, missing = cachelfiles(ui, repo, ctx.node())
                         totalsuccess += len(success)
                         totalmissing += len(missing)
                 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
                 if totalmissing > 0:
                     ui.status(_("%d largefiles failed to download\n") % totalmissing)
                 return totalsuccess, totalmissing
             def updatelfiles(ui, repo, filelist=None, printmessage=None,
                              normallookup=False):
                 '''Update largefiles according to standins in the working directory
                 If ``printmessage`` is other than ``None``, it means "print (or
                 ignore, for false) message forcibly".
                 '''
                 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
                 with repo.wlock():
                     lfdirstate = lfutil.openlfdirstate(ui, repo)
                     lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
                     if filelist is not None:
                         filelist = set(filelist)
                         lfiles = [f for f in lfiles if f in filelist]
                     update = {}
                     dropped = set()
                     updated, removed = 0, 0
                     wvfs = repo.wvfs
                     wctx = repo[None]
                     for lfile in lfiles:
                         rellfile = lfile
                         rellfileorig = os.path.relpath(
                             scmutil.origpath(ui, repo, wvfs.join(rellfile)),
                             start=repo.root)
                         relstandin = lfutil.standin(lfile)
                         relstandinorig = os.path.relpath(
                             scmutil.origpath(ui, repo, wvfs.join(relstandin)),
                             start=repo.root)
                         if wvfs.exists(relstandin):
                             if (wvfs.exists(relstandinorig) and
                                 wvfs.exists(rellfile)):
                                 shutil.copyfile(wvfs.join(rellfile),
                                                 wvfs.join(rellfileorig))
                                 wvfs.unlinkpath(relstandinorig)
                             expecthash = lfutil.readasstandin(wctx[relstandin])
                             if expecthash != '':
                                 if lfile not in wctx: # not switched to normal file
                                     if repo.dirstate[relstandin] != '?':
                                         wvfs.unlinkpath(rellfile, ignoremissing=True)
                                     else:
                                         dropped.add(rellfile)
                                 # use normallookup() to allocate an entry in largefiles
                                 # dirstate to prevent lfilesrepo.status() from reporting
                                 # missing files as removed.
                                 lfdirstate.normallookup(lfile)
                                 update[lfile] = expecthash
                         else:
                             # Remove lfiles for which the standin is deleted, unless the
                             # lfile is added to the repository again. This happens when a
                             # largefile is converted back to a normal file: the standin
                             # disappears, but a new (normal) file appears as the lfile.
                             if (wvfs.exists(rellfile) and
                                 repo.dirstate.normalize(lfile) not in wctx):
                                 wvfs.unlinkpath(rellfile)
                                 removed += 1
                     # largefile processing might be slow and be interrupted - be prepared
                     lfdirstate.write()
                     if lfiles:
                         lfiles = [f for f in lfiles if f not in dropped]
                         for f in dropped:
                             repo.wvfs.unlinkpath(lfutil.standin(f))
                             # This needs to happen for dropped files, otherwise they stay in
                             # the M state.
                             lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
                         statuswriter(_('getting changed largefiles\n'))
                         cachelfiles(ui, repo, None, lfiles)
                     for lfile in lfiles:
                         update1 = 0
                         expecthash = update.get(lfile)
                         if expecthash:
                             if not lfutil.copyfromcache(repo, expecthash, lfile):
                                 # failed ... but already removed and set to normallookup
                                 continue
                             # Synchronize largefile dirstate to the last modified
                             # time of the file
                             lfdirstate.normal(lfile)
                             update1 = 1
                         # copy the exec mode of largefile standin from the repository's
                         # dirstate to its state in the lfdirstate.
                         rellfile = lfile
                         relstandin = lfutil.standin(lfile)
                         if wvfs.exists(relstandin):
                             # exec is decided by the users permissions using mask 0o100
                             standinexec = wvfs.stat(relstandin).st_mode & 0o100
                             st = wvfs.stat(rellfile)
                             mode = st.st_mode
                             if standinexec != mode & 0o100:
                                 # first remove all X bits, then shift all R bits to X
                                 mode &= ~0o111
                                 if standinexec:
                                     mode |= (mode >> 2) & 0o111 & ~util.umask
                                 wvfs.chmod(rellfile, mode)
                                 update1 = 1
                         updated += update1
                         lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
                     lfdirstate.write()
                     if lfiles:
                         statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
                             removed))
             @command('lfpull',
                 [('r', 'rev', [], _('pull largefiles for these revisions'))
                 ] + cmdutil.remoteopts,
                 _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
             def lfpull(ui, repo, source="default", **opts):
                 """pull largefiles for the specified revisions from the specified source
                 Pull largefiles that are referenced from local changesets but missing
                 locally, pulling from a remote repository to the local cache.
                 If SOURCE is omitted, the 'default' path will be used.
                 See :hg:`help urls` for more information.
                 .. container:: verbose
                   Some examples:
                   - pull largefiles for all branch heads::
                       hg lfpull -r "head() and not closed()"
                   - pull largefiles on the default branch::
                       hg lfpull -r "branch(default)"
                 """
                 repo.lfpullsource = source
                 revs = opts.get(r'rev', [])
                 if not revs:
                     raise error.Abort(_('no revisions specified'))
                 revs = scmutil.revrange(repo, revs)
                 numcached = 0
                 for rev in revs:
                     ui.note(_('pulling largefiles for revision %d\n') % rev)
                     (cached, missing) = cachelfiles(ui, repo, rev)
                     numcached += len(cached)
                 ui.status(_("%d largefiles cached\n") % numcached)
             @command('debuglfput',
                 [] + cmdutil.remoteopts,
                 _('FILE'))
             def debuglfput(ui, repo, filepath, **kwargs):
                 hash = lfutil.hashfile(filepath)
                 storefactory.openstore(repo).put(filepath, hash)
                 ui.write('%s\n' % hash)
                 return 0

hgext/lfs/blobstore.py

0 +5 -4

             # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
             #
             # Copyright 2017 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import contextlib
             import errno
             import hashlib
             import json
             import os
             import re
             import socket
             from mercurial.i18n import _
             from mercurial import (
                 encoding,
                 error,
+                node,
                 pathutil,
                 pycompat,
                 url as urlmod,
                 util,
                 vfs as vfsmod,
                 worker,
             )
             from mercurial.utils import (
                 stringutil,
             )
             from ..largefiles import lfutil
             # 64 bytes for SHA256
             _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
             class lfsvfs(vfsmod.vfs):
                 def join(self, path):
                     """split the path at first two characters, like: XX/XXXXX..."""
                     if not _lfsre.match(path):
                         raise error.ProgrammingError('unexpected lfs path: %s' % path)
                     return super(lfsvfs, self).join(path[0:2], path[2:])
                 def walk(self, path=None, onerror=None):
                     """Yield (dirpath, [], oids) tuple for blobs under path
                     Oids only exist in the root of this vfs, so dirpath is always ''.
                     """
                     root = os.path.normpath(self.base)
                     # when dirpath == root, dirpath[prefixlen:] becomes empty
                     # because len(dirpath) < prefixlen.
                     prefixlen = len(pathutil.normasprefix(root))
                     oids = []
                     for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
                                                         onerror=onerror):
                         dirpath = dirpath[prefixlen:]
                         # Silently skip unexpected files and directories
                         if len(dirpath) == 2:
                             oids.extend([dirpath + f for f in files
                                          if _lfsre.match(dirpath + f)])
                     yield ('', [], oids)
             class nullvfs(lfsvfs):
                 def __init__(self):
                     pass
                 def exists(self, oid):
                     return False
                 def read(self, oid):
                     # store.read() calls into here if the blob doesn't exist in its
                     # self.vfs.  Raise the same error as a normal vfs when asked to read a
                     # file that doesn't exist.  The only difference is the full file path
                     # isn't available in the error.
                     raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
                 def walk(self, path=None, onerror=None):
                     return ('', [], [])
                 def write(self, oid, data):
                     pass
             class filewithprogress(object):
                 """a file-like object that supports __len__ and read.
                 Useful to provide progress information for how many bytes are read.
                 """
                 def __init__(self, fp, callback):
                     self._fp = fp
                     self._callback = callback # func(readsize)
                     fp.seek(0, os.SEEK_END)
                     self._len = fp.tell()
                     fp.seek(0)
                 def __len__(self):
                     return self._len
                 def read(self, size):
                     if self._fp is None:
                         return b''
                     data = self._fp.read(size)
                     if data:
                         if self._callback:
                             self._callback(len(data))
                     else:
                         self._fp.close()
                         self._fp = None
                     return data
             class local(object):
                 """Local blobstore for large file contents.
                 This blobstore is used both as a cache and as a staging area for large blobs
                 to be uploaded to the remote blobstore.
                 """
                 def __init__(self, repo):
                     fullpath = repo.svfs.join('lfs/objects')
                     self.vfs = lfsvfs(fullpath)
                     if repo.ui.configbool('experimental', 'lfs.disableusercache'):
                         self.cachevfs = nullvfs()
                     else:
                         usercache = lfutil._usercachedir(repo.ui, 'lfs')
                         self.cachevfs = lfsvfs(usercache)
                     self.ui = repo.ui
                 def open(self, oid):
                     """Open a read-only file descriptor to the named blob, in either the
                     usercache or the local store."""
                     # The usercache is the most likely place to hold the file.  Commit will
                     # write to both it and the local store, as will anything that downloads
                     # the blobs.  However, things like clone without an update won't
                     # populate the local store.  For an init + push of a local clone,
                     # the usercache is the only place it _could_ be.  If not present, the
                     # missing file msg here will indicate the local repo, not the usercache.
                     if self.cachevfs.exists(oid):
                         return self.cachevfs(oid, 'rb')
                     return self.vfs(oid, 'rb')
                 def download(self, oid, src):
                     """Read the blob from the remote source in chunks, verify the content,
                     and write to this local blobstore."""
                     sha256 = hashlib.sha256()
                     with self.vfs(oid, 'wb', atomictemp=True) as fp:
                         for chunk in util.filechunkiter(src, size=1048576):
                             fp.write(chunk)
                             sha256.update(chunk)
-                        realoid = sha256.hexdigest()
+                        realoid = node.hex(sha256.digest())
                         if realoid != oid:
                             raise LfsCorruptionError(_('corrupt remote lfs object: %s')
                                                      % oid)
                     self._linktousercache(oid)
                 def write(self, oid, data):
                     """Write blob to local blobstore.
                     This should only be called from the filelog during a commit or similar.
                     As such, there is no need to verify the data.  Imports from a remote
                     store must use ``download()`` instead."""
                     with self.vfs(oid, 'wb', atomictemp=True) as fp:
                         fp.write(data)
                     self._linktousercache(oid)
                 def linkfromusercache(self, oid):
                     """Link blobs found in the user cache into this store.
                     The server module needs to do this when it lets the client know not to
                     upload the blob, to ensure it is always available in this store.
                     Normally this is done implicitly when the client reads or writes the
                     blob, but that doesn't happen when the server tells the client that it
                     already has the blob.
                     """
                     if (not isinstance(self.cachevfs, nullvfs)
                         and not self.vfs.exists(oid)):
                         self.ui.note(_('lfs: found %s in the usercache\n') % oid)
                         lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                 def _linktousercache(self, oid):
                     # XXX: should we verify the content of the cache, and hardlink back to
                     # the local store on success, but truncate, write and link on failure?
                     if (not self.cachevfs.exists(oid)
                         and not isinstance(self.cachevfs, nullvfs)):
                         self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
                         lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
                 def read(self, oid, verify=True):
                     """Read blob from local blobstore."""
                     if not self.vfs.exists(oid):
                         blob = self._read(self.cachevfs, oid, verify)
                         # Even if revlog will verify the content, it needs to be verified
                         # now before making the hardlink to avoid propagating corrupt blobs.
                         # Don't abort if corruption is detected, because `hg verify` will
                         # give more useful info about the corruption- simply don't add the
                         # hardlink.
-                        if verify or hashlib.sha256(blob).hexdigest() == oid:
+                        if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
                             self.ui.note(_('lfs: found %s in the usercache\n') % oid)
                             lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                     else:
                         self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
                         blob = self._read(self.vfs, oid, verify)
                     return blob
                 def _read(self, vfs, oid, verify):
                     """Read blob (after verifying) from the given store"""
                     blob = vfs.read(oid)
                     if verify:
                         _verify(oid, blob)
                     return blob
                 def verify(self, oid):
                     """Indicate whether or not the hash of the underlying file matches its
                     name."""
                     sha256 = hashlib.sha256()
                     with self.open(oid) as fp:
                         for chunk in util.filechunkiter(fp, size=1048576):
                             sha256.update(chunk)
-                    return oid == sha256.hexdigest()
+                    return oid == node.hex(sha256.digest())
                 def has(self, oid):
                     """Returns True if the local blobstore contains the requested blob,
                     False otherwise."""
                     return self.cachevfs.exists(oid) or self.vfs.exists(oid)
             def _urlerrorreason(urlerror):
                 '''Create a friendly message for the given URLError to be used in an
                 LfsRemoteError message.
                 '''
                 inst = urlerror
                 if isinstance(urlerror.reason, Exception):
                     inst = urlerror.reason
                 if util.safehasattr(inst, 'reason'):
                     try: # usually it is in the form (errno, strerror)
                         reason = inst.reason.args[1]
                     except (AttributeError, IndexError):
                         # it might be anything, for example a string
                         reason = inst.reason
                     if isinstance(reason, pycompat.unicode):
                         # SSLError of Python 2.7.9 contains a unicode
                         reason = encoding.unitolocal(reason)
                     return reason
                 elif getattr(inst, "strerror", None):
                     return encoding.strtolocal(inst.strerror)
                 else:
                     return stringutil.forcebytestr(urlerror)
             class _gitlfsremote(object):
                 def __init__(self, repo, url):
                     ui = repo.ui
                     self.ui = ui
                     baseurl, authinfo = url.authinfo()
                     self.baseurl = baseurl.rstrip('/')
                     useragent = repo.ui.config('experimental', 'lfs.user-agent')
                     if not useragent:
                         useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
                     self.urlopener = urlmod.opener(ui, authinfo, useragent)
                     self.retry = ui.configint('lfs', 'retry')
                 def writebatch(self, pointers, fromstore):
                     """Batch upload from local to remote blobstore."""
                     self._batch(_deduplicate(pointers), fromstore, 'upload')
                 def readbatch(self, pointers, tostore):
                     """Batch download from remote to local blostore."""
                     self._batch(_deduplicate(pointers), tostore, 'download')
                 def _batchrequest(self, pointers, action):
                     """Get metadata about objects pointed by pointers for given action
                     Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
                     See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
                     """
                     objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
                     requestdata = json.dumps({
                         'objects': objects,
                         'operation': action,
                     })
                     url = '%s/objects/batch' % self.baseurl
                     batchreq = util.urlreq.request(url, data=requestdata)
                     batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
                     batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
                     try:
                         with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
                             rawjson = rsp.read()
                     except util.urlerr.httperror as ex:
                         hints = {
 : _('check that lfs serving is enabled on %s and "%s" is '
                                    'supported') % (self.baseurl, action),
 : _('the "lfs.url" config may be used to override %s')
                                    % self.baseurl,
                         }
                         hint = hints.get(ex.code, _('api=%s, action=%s') % (url, action))
                         raise LfsRemoteError(_('LFS HTTP error: %s') % ex, hint=hint)
                     except util.urlerr.urlerror as ex:
                         hint = (_('the "lfs.url" config may be used to override %s')
                                 % self.baseurl)
                         raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
                                              hint=hint)
                     try:
                         response = json.loads(rawjson)
                     except ValueError:
                         raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
                                              % rawjson)
                     if self.ui.debugflag:
                         self.ui.debug('Status: %d\n' % rsp.status)
                         # lfs-test-server and hg serve return headers in different order
                         self.ui.debug('%s\n'
                                       % '\n'.join(sorted(str(rsp.info()).splitlines())))
                         if 'objects' in response:
                             response['objects'] = sorted(response['objects'],
                                                          key=lambda p: p['oid'])
                         self.ui.debug('%s\n'
                                       % json.dumps(response, indent=2,
                                                    separators=('', ': '), sort_keys=True))
                     return response
                 def _checkforservererror(self, pointers, responses, action):
                     """Scans errors from objects
                     Raises LfsRemoteError if any objects have an error"""
                     for response in responses:
                         # The server should return 404 when objects cannot be found. Some
                         # server implementation (ex. lfs-test-server)  does not set "error"
                         # but just removes "download" from "actions". Treat that case
                         # as the same as 404 error.
                         if 'error' not in response:
                             if (action == 'download'
                                 and action not in response.get('actions', [])):
                                 code = 404
                             else:
                                 continue
                         else:
                             # An error dict without a code doesn't make much sense, so
                             # treat as a server error.
                             code = response.get('error').get('code', 500)
                         ptrmap = {p.oid(): p for p in pointers}
                         p = ptrmap.get(response['oid'], None)
                         if p:
                             filename = getattr(p, 'filename', 'unknown')
                             errors = {
 : 'The object does not exist',
 : 'The object was removed by the owner',
 : 'Validation error',
 : 'Internal server error',
                             }
                             msg = errors.get(code, 'status code %d' % code)
                             raise LfsRemoteError(_('LFS server error for "%s": %s')
                                                  % (filename, msg))
                         else:
                             raise LfsRemoteError(
                                 _('LFS server error. Unsolicited response for oid %s')
                                 % response['oid'])
                 def _extractobjects(self, response, pointers, action):
                     """extract objects from response of the batch API
                     response: parsed JSON object returned by batch API
                     return response['objects'] filtered by action
                     raise if any object has an error
                     """
                     # Scan errors from objects - fail early
                     objects = response.get('objects', [])
                     self._checkforservererror(pointers, objects, action)
                     # Filter objects with given action. Practically, this skips uploading
                     # objects which exist in the server.
                     filteredobjects = [o for o in objects if action in o.get('actions', [])]
                     return filteredobjects
                 def _basictransfer(self, obj, action, localstore):
                     """Download or upload a single object using basic transfer protocol
                     obj: dict, an object description returned by batch API
                     action: string, one of ['upload', 'download']
                     localstore: blobstore.local
                     See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
                     basic-transfers.md
                     """
                     oid = pycompat.bytestr(obj['oid'])
                     href = pycompat.bytestr(obj['actions'][action].get('href'))
                     headers = obj['actions'][action].get('header', {}).items()
                     request = util.urlreq.request(href)
                     if action == 'upload':
                         # If uploading blobs, read data from local blobstore.
                         if not localstore.verify(oid):
                             raise error.Abort(_('detected corrupt lfs object: %s') % oid,
                                               hint=_('run hg verify'))
                         request.data = filewithprogress(localstore.open(oid), None)
                         request.get_method = lambda: 'PUT'
                         request.add_header('Content-Type', 'application/octet-stream')
                     for k, v in headers:
                         request.add_header(k, v)
                     response = b''
                     try:
                         with contextlib.closing(self.urlopener.open(request)) as req:
                             ui = self.ui  # Shorten debug lines
                             if self.ui.debugflag:
                                 ui.debug('Status: %d\n' % req.status)
                                 # lfs-test-server and hg serve return headers in different
                                 # order
                                 ui.debug('%s\n'
                                          % '\n'.join(sorted(str(req.info()).splitlines())))
                             if action == 'download':
                                 # If downloading blobs, store downloaded data to local
                                 # blobstore
                                 localstore.download(oid, req)
                             else:
                                 while True:
                                     data = req.read(1048576)
                                     if not data:
                                         break
                                     response += data
                                 if response:
                                     ui.debug('lfs %s response: %s' % (action, response))
                     except util.urlerr.httperror as ex:
                         if self.ui.debugflag:
                             self.ui.debug('%s: %s\n' % (oid, ex.read()))
                         raise LfsRemoteError(_('LFS HTTP error: %s (oid=%s, action=%s)')
                                              % (ex, oid, action))
                     except util.urlerr.urlerror as ex:
                         hint = (_('attempted connection to %s')
                                 % util.urllibcompat.getfullurl(request))
                         raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
                                              hint=hint)
                 def _batch(self, pointers, localstore, action):
                     if action not in ['upload', 'download']:
                         raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
                     response = self._batchrequest(pointers, action)
                     objects = self._extractobjects(response, pointers, action)
                     total = sum(x.get('size', 0) for x in objects)
                     sizes = {}
                     for obj in objects:
                         sizes[obj.get('oid')] = obj.get('size', 0)
                     topic = {'upload': _('lfs uploading'),
                              'download': _('lfs downloading')}[action]
                     if len(objects) > 1:
                         self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
                                      % (len(objects), util.bytecount(total)))
                     def transfer(chunk):
                         for obj in chunk:
                             objsize = obj.get('size', 0)
                             if self.ui.verbose:
                                 if action == 'download':
                                     msg = _('lfs: downloading %s (%s)\n')
                                 elif action == 'upload':
                                     msg = _('lfs: uploading %s (%s)\n')
                                 self.ui.note(msg % (obj.get('oid'),
                                              util.bytecount(objsize)))
                             retry = self.retry
                             while True:
                                 try:
                                     self._basictransfer(obj, action, localstore)
                                     yield 1, obj.get('oid')
                                     break
                                 except socket.error as ex:
                                     if retry > 0:
                                         self.ui.note(
                                             _('lfs: failed: %r (remaining retry %d)\n')
                                             % (ex, retry))
                                         retry -= 1
                                         continue
                                     raise
                     # Until https multiplexing gets sorted out
                     if self.ui.configbool('experimental', 'lfs.worker-enable'):
                         oids = worker.worker(self.ui, 0.1, transfer, (),
                                              sorted(objects, key=lambda o: o.get('oid')))
                     else:
                         oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
                     with self.ui.makeprogress(topic, total=total) as progress:
                         progress.update(0)
                         processed = 0
                         blobs = 0
                         for _one, oid in oids:
                             processed += sizes[oid]
                             blobs += 1
                             progress.update(processed)
                             self.ui.note(_('lfs: processed: %s\n') % oid)
                     if blobs > 0:
                         if action == 'upload':
                             self.ui.status(_('lfs: uploaded %d files (%s)\n')
                                            % (blobs, util.bytecount(processed)))
                         elif action == 'download':
                             self.ui.status(_('lfs: downloaded %d files (%s)\n')
                                            % (blobs, util.bytecount(processed)))
                 def __del__(self):
                     # copied from mercurial/httppeer.py
                     urlopener = getattr(self, 'urlopener', None)
                     if urlopener:
                         for h in urlopener.handlers:
                             h.close()
                             getattr(h, "close_all", lambda : None)()
             class _dummyremote(object):
                 """Dummy store storing blobs to temp directory."""
                 def __init__(self, repo, url):
                     fullpath = repo.vfs.join('lfs', url.path)
                     self.vfs = lfsvfs(fullpath)
                 def writebatch(self, pointers, fromstore):
                     for p in _deduplicate(pointers):
                         content = fromstore.read(p.oid(), verify=True)
                         with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
                             fp.write(content)
                 def readbatch(self, pointers, tostore):
                     for p in _deduplicate(pointers):
                         with self.vfs(p.oid(), 'rb') as fp:
                             tostore.download(p.oid(), fp)
             class _nullremote(object):
                 """Null store storing blobs to /dev/null."""
                 def __init__(self, repo, url):
                     pass
                 def writebatch(self, pointers, fromstore):
                     pass
                 def readbatch(self, pointers, tostore):
                     pass
             class _promptremote(object):
                 """Prompt user to set lfs.url when accessed."""
                 def __init__(self, repo, url):
                     pass
                 def writebatch(self, pointers, fromstore, ui=None):
                     self._prompt()
                 def readbatch(self, pointers, tostore, ui=None):
                     self._prompt()
                 def _prompt(self):
                     raise error.Abort(_('lfs.url needs to be configured'))
             _storemap = {
                 'https': _gitlfsremote,
                 'http': _gitlfsremote,
                 'file': _dummyremote,
                 'null': _nullremote,
                 None: _promptremote,
             }
             def _deduplicate(pointers):
                 """Remove any duplicate oids that exist in the list"""
                 reduced = util.sortdict()
                 for p in pointers:
                     reduced[p.oid()] = p
                 return reduced.values()
             def _verify(oid, content):
-                realoid = hashlib.sha256(content).hexdigest()
+                realoid = node.hex(hashlib.sha256(content).digest())
                 if realoid != oid:
                     raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
                                              hint=_('run hg verify'))
             def remote(repo, remote=None):
                 """remotestore factory. return a store in _storemap depending on config
                 If ``lfs.url`` is specified, use that remote endpoint.  Otherwise, try to
                 infer the endpoint, based on the remote repository using the same path
                 adjustments as git.  As an extension, 'http' is supported as well so that
                 ``hg serve`` works out of the box.
                 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
                 """
                 lfsurl = repo.ui.config('lfs', 'url')
                 url = util.url(lfsurl or '')
                 if lfsurl is None:
                     if remote:
                         path = remote
                     elif util.safehasattr(repo, '_subtoppath'):
                         # The pull command sets this during the optional update phase, which
                         # tells exactly where the pull originated, whether 'paths.default'
                         # or explicit.
                         path = repo._subtoppath
                     else:
                         # TODO: investigate 'paths.remote:lfsurl' style path customization,
                         # and fall back to inferring from 'paths.remote' if unspecified.
                         path = repo.ui.config('paths', 'default') or ''
                     defaulturl = util.url(path)
                     # TODO: support local paths as well.
                     # TODO: consider the ssh -> https transformation that git applies
                     if defaulturl.scheme in (b'http', b'https'):
                         if defaulturl.path and defaulturl.path[:-1] != b'/':
                             defaulturl.path += b'/'
                         defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
                         url = util.url(bytes(defaulturl))
                         repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
                 scheme = url.scheme
                 if scheme not in _storemap:
                     raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
                 return _storemap[scheme](repo, url)
             class LfsRemoteError(error.StorageError):
                 pass
             class LfsCorruptionError(error.Abort):
                 """Raised when a corrupt blob is detected, aborting an operation
                 It exists to allow specialized handling on the server side."""

hgext/remotefilelog/basepack.py

0 +2 -1

             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import mmap
             import os
             import struct
             import time
             from mercurial.i18n import _
             from mercurial import (
+                node as nodemod,
                 policy,
                 pycompat,
                 util,
                 vfs as vfsmod,
             )
             from . import shallowutil
             osutil = policy.importmod(r'osutil')
             # The pack version supported by this implementation. This will need to be
             # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
             # changing any of the int sizes, changing the delta algorithm, etc.
             PACKVERSIONSIZE = 1
             INDEXVERSIONSIZE = 2
             FANOUTSTART = INDEXVERSIONSIZE
             # Constant that indicates a fanout table entry hasn't been filled in. (This does
             # not get serialized)
             EMPTYFANOUT = -1
             # The fanout prefix is the number of bytes that can be addressed by the fanout
             # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
             # look in the fanout table (which will be 2^8 entries long).
             SMALLFANOUTPREFIX = 1
             LARGEFANOUTPREFIX = 2
             # The number of entries in the index at which point we switch to a large fanout.
             # It is chosen to balance the linear scan through a sparse fanout, with the
             # size of the bisect in actual index.
             # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
             # bisect) with (8 step fanout scan + 1 step bisect)
             # 5 step bisect = log(2^16 / 8 / 255)  # fanout
             # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
             SMALLFANOUTCUTOFF = 2**16 / 8
             # The amount of time to wait between checking for new packs. This prevents an
             # exception when data is moved to a new pack after the process has already
             # loaded the pack list.
             REFRESHRATE = 0.1
             if pycompat.isposix:
                 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                 # The 'e' flag will be ignored on older versions of glibc.
                 PACKOPENMODE = 'rbe'
             else:
                 PACKOPENMODE = 'rb'
             class _cachebackedpacks(object):
                 def __init__(self, packs, cachesize):
                     self._packs = set(packs)
                     self._lrucache = util.lrucachedict(cachesize)
                     self._lastpack = None
                     # Avoid cold start of the cache by populating the most recent packs
                     # in the cache.
                     for i in reversed(range(min(cachesize, len(packs)))):
                         self._movetofront(packs[i])
                 def _movetofront(self, pack):
                     # This effectively makes pack the first entry in the cache.
                     self._lrucache[pack] = True
                 def _registerlastpackusage(self):
                     if self._lastpack is not None:
                         self._movetofront(self._lastpack)
                         self._lastpack = None
                 def add(self, pack):
                     self._registerlastpackusage()
                     # This method will mostly be called when packs are not in cache.
                     # Therefore, adding pack to the cache.
                     self._movetofront(pack)
                     self._packs.add(pack)
                 def __iter__(self):
                     self._registerlastpackusage()
                     # Cache iteration is based on LRU.
                     for pack in self._lrucache:
                         self._lastpack = pack
                         yield pack
                     cachedpacks = set(pack for pack in self._lrucache)
                     # Yield for paths not in the cache.
                     for pack in self._packs - cachedpacks:
                         self._lastpack = pack
                         yield pack
                     # Data not found in any pack.
                     self._lastpack = None
             class basepackstore(object):
                 # Default cache size limit for the pack files.
                 DEFAULTCACHESIZE = 100
                 def __init__(self, ui, path):
                     self.ui = ui
                     self.path = path
                     # lastrefesh is 0 so we'll immediately check for new packs on the first
                     # failure.
                     self.lastrefresh = 0
                     packs = []
                     for filepath, __, __ in self._getavailablepackfilessorted():
                         try:
                             pack = self.getpack(filepath)
                         except Exception as ex:
                             # An exception may be thrown if the pack file is corrupted
                             # somehow.  Log a warning but keep going in this case, just
                             # skipping this pack file.
                             #
                             # If this is an ENOENT error then don't even bother logging.
                             # Someone could have removed the file since we retrieved the
                             # list of paths.
                             if getattr(ex, 'errno', None) != errno.ENOENT:
                                 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
                             continue
                         packs.append(pack)
                     self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                 def _getavailablepackfiles(self):
                     """For each pack file (a index/data file combo), yields:
                       (full path without extension, mtime, size)
                     mtime will be the mtime of the index/data file (whichever is newer)
                     size is the combined size of index/data file
                     """
                     indexsuffixlen = len(self.INDEXSUFFIX)
                     packsuffixlen = len(self.PACKSUFFIX)
                     ids = set()
                     sizes = collections.defaultdict(lambda: 0)
                     mtimes = collections.defaultdict(lambda: [])
                     try:
                         for filename, type, stat in osutil.listdir(self.path, stat=True):
                             id = None
                             if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                 id = filename[:-indexsuffixlen]
                             elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                 id = filename[:-packsuffixlen]
                             # Since we expect to have two files corresponding to each ID
                             # (the index file and the pack file), we can yield once we see
                             # it twice.
                             if id:
                                 sizes[id] += stat.st_size # Sum both files' sizes together
                                 mtimes[id].append(stat.st_mtime)
                                 if id in ids:
                                     yield (os.path.join(self.path, id), max(mtimes[id]),
                                         sizes[id])
                                 else:
                                     ids.add(id)
                     except OSError as ex:
                         if ex.errno != errno.ENOENT:
                             raise
                 def _getavailablepackfilessorted(self):
                     """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                     yielding newest files first.
                     This is desirable, since it is more likely newer packfiles have more
                     desirable data.
                     """
                     files = []
                     for path, mtime, size in self._getavailablepackfiles():
                         files.append((mtime, size, path))
                     files = sorted(files, reverse=True)
                     for mtime, size, path in files:
                         yield path, mtime, size
                 def gettotalsizeandcount(self):
                     """Returns the total disk size (in bytes) of all the pack files in
                     this store, and the count of pack files.
                     (This might be smaller than the total size of the ``self.path``
                     directory, since this only considers fuly-writen pack files, and not
                     temporary files or other detritus on the directory.)
                     """
                     totalsize = 0
                     count = 0
                     for __, __, size in self._getavailablepackfiles():
                         totalsize += size
                         count += 1
                     return totalsize, count
                 def getmetrics(self):
                     """Returns metrics on the state of this store."""
                     size, count = self.gettotalsizeandcount()
                     return {
                         'numpacks': count,
                         'totalpacksize': size,
                     }
                 def getpack(self, path):
                     raise NotImplementedError()
                 def getmissing(self, keys):
                     missing = keys
                     for pack in self.packs:
                         missing = pack.getmissing(missing)
                         # Ensures better performance of the cache by keeping the most
                         # recently accessed pack at the beginning in subsequent iterations.
                         if not missing:
                             return missing
                     if missing:
                         for pack in self.refresh():
                             missing = pack.getmissing(missing)
                     return missing
                 def markledger(self, ledger, options=None):
                     for pack in self.packs:
                         pack.markledger(ledger)
                 def markforrefresh(self):
                     """Tells the store that there may be new pack files, so the next time it
                     has a lookup miss it should check for new files."""
                     self.lastrefresh = 0
                 def refresh(self):
                     """Checks for any new packs on disk, adds them to the main pack list,
                     and returns a list of just the new packs."""
                     now = time.time()
                     # If we experience a lot of misses (like in the case of getmissing() on
                     # new objects), let's only actually check disk for new stuff every once
                     # in a while. Generally this code path should only ever matter when a
                     # repack is going on in the background, and that should be pretty rare
                     # to have that happen twice in quick succession.
                     newpacks = []
                     if now > self.lastrefresh + REFRESHRATE:
                         self.lastrefresh = now
                         previous = set(p.path for p in self.packs)
                         for filepath, __, __ in self._getavailablepackfilessorted():
                             if filepath not in previous:
                                 newpack = self.getpack(filepath)
                                 newpacks.append(newpack)
                                 self.packs.add(newpack)
                     return newpacks
             class versionmixin(object):
                 # Mix-in for classes with multiple supported versions
                 VERSION = None
                 SUPPORTED_VERSIONS = [2]
                 def _checkversion(self, version):
                     if version in self.SUPPORTED_VERSIONS:
                         if self.VERSION is None:
                             # only affect this instance
                             self.VERSION = version
                         elif self.VERSION != version:
                             raise RuntimeError('inconsistent version: %s' % version)
                     else:
                         raise RuntimeError('unsupported version: %s' % version)
             class basepack(versionmixin):
                 # The maximum amount we should read via mmap before remmaping so the old
                 # pages can be released (100MB)
                 MAXPAGEDIN = 100 * 1024**2
                 SUPPORTED_VERSIONS = [2]
                 def __init__(self, path):
                     self.path = path
                     self.packpath = path + self.PACKSUFFIX
                     self.indexpath = path + self.INDEXSUFFIX
                     self.indexsize = os.stat(self.indexpath).st_size
                     self.datasize = os.stat(self.packpath).st_size
                     self._index = None
                     self._data = None
                     self.freememory() # initialize the mmap
                     version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
                     self._checkversion(version)
                     version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
                     self._checkversion(version)
                     if 0b10000000 & config:
                         self.params = indexparams(LARGEFANOUTPREFIX, version)
                     else:
                         self.params = indexparams(SMALLFANOUTPREFIX, version)
                 @util.propertycache
                 def _fanouttable(self):
                     params = self.params
                     rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
                     fanouttable = []
                     for i in pycompat.xrange(0, params.fanoutcount):
                         loc = i * 4
                         fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
                         fanouttable.append(fanoutentry)
                     return fanouttable
                 @util.propertycache
                 def _indexend(self):
                     nodecount = struct.unpack_from('!Q', self._index,
                                                    self.params.indexstart - 8)[0]
                     return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                 def freememory(self):
                     """Unmap and remap the memory to free it up after known expensive
                     operations. Return True if self._data and self._index were reloaded.
                     """
                     if self._index:
                         if self._pagedin < self.MAXPAGEDIN:
                             return False
                         self._index.close()
                         self._data.close()
                     # TODO: use an opener/vfs to access these paths
                     with open(self.indexpath, PACKOPENMODE) as indexfp:
                         # memory-map the file, size 0 means whole file
                         self._index = mmap.mmap(indexfp.fileno(), 0,
                                                 access=mmap.ACCESS_READ)
                     with open(self.packpath, PACKOPENMODE) as datafp:
                         self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                     self._pagedin = 0
                     return True
                 def getmissing(self, keys):
                     raise NotImplementedError()
                 def markledger(self, ledger, options=None):
                     raise NotImplementedError()
                 def cleanup(self, ledger):
                     raise NotImplementedError()
                 def __iter__(self):
                     raise NotImplementedError()
                 def iterentries(self):
                     raise NotImplementedError()
             class mutablebasepack(versionmixin):
                 def __init__(self, ui, packdir, version=2):
                     self._checkversion(version)
                     # TODO(augie): make this configurable
                     self._compressor = 'GZ'
                     opener = vfsmod.vfs(packdir)
                     opener.createmode = 0o444
                     self.opener = opener
                     self.entries = {}
                     shallowutil.mkstickygroupdir(ui, packdir)
                     self.packfp, self.packpath = opener.mkstemp(
                         suffix=self.PACKSUFFIX + '-tmp')
                     self.idxfp, self.idxpath = opener.mkstemp(
                         suffix=self.INDEXSUFFIX + '-tmp')
                     self.packfp = os.fdopen(self.packfp, r'wb+')
                     self.idxfp = os.fdopen(self.idxfp, r'wb+')
                     self.sha = hashlib.sha1()
                     self._closed = False
                     # The opener provides no way of doing permission fixup on files created
                     # via mkstemp, so we must fix it ourselves. We can probably fix this
                     # upstream in vfs.mkstemp so we don't need to use the private method.
                     opener._fixfilemode(opener.join(self.packpath))
                     opener._fixfilemode(opener.join(self.idxpath))
                     # Write header
                     # TODO: make it extensible (ex: allow specifying compression algorithm,
                     # a flexible key/value header, delta algorithm, fanout size, etc)
                     versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
                     self.writeraw(versionbuf)
                 def __enter__(self):
                     return self
                 def __exit__(self, exc_type, exc_value, traceback):
                     if exc_type is None:
                         self.close()
                     else:
                         self.abort()
                 def abort(self):
                     # Unclean exit
                     self._cleantemppacks()
                 def writeraw(self, data):
                     self.packfp.write(data)
                     self.sha.update(data)
                 def close(self, ledger=None):
                     if self._closed:
                         return
                     try:
-                        sha = self.sha.hexdigest()
+                        sha = nodemod.hex(self.sha.digest())
                         self.packfp.close()
                         self.writeindex()
                         if len(self.entries) == 0:
                             # Empty pack
                             self._cleantemppacks()
                             self._closed = True
                             return None
                         self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                         try:
                             self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                         except Exception as ex:
                             try:
                                 self.opener.unlink(sha + self.PACKSUFFIX)
                             except Exception:
                                 pass
                             # Throw exception 'ex' explicitly since a normal 'raise' would
                             # potentially throw an exception from the unlink cleanup.
                             raise ex
                     except Exception:
                         # Clean up temp packs in all exception cases
                         self._cleantemppacks()
                         raise
                     self._closed = True
                     result = self.opener.join(sha)
                     if ledger:
                         ledger.addcreated(result)
                     return result
                 def _cleantemppacks(self):
                     try:
                         self.opener.unlink(self.packpath)
                     except Exception:
                         pass
                     try:
                         self.opener.unlink(self.idxpath)
                     except Exception:
                         pass
                 def writeindex(self):
                     rawindex = ''
                     largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                     if largefanout:
                         params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                     else:
                         params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                     fanouttable = [EMPTYFANOUT] * params.fanoutcount
                     # Precompute the location of each entry
                     locations = {}
                     count = 0
                     for node in sorted(self.entries):
                         location = count * self.INDEXENTRYLENGTH
                         locations[node] = location
                         count += 1
                         # Must use [0] on the unpack result since it's always a tuple.
                         fanoutkey = struct.unpack(params.fanoutstruct,
                                                   node[:params.fanoutprefix])[0]
                         if fanouttable[fanoutkey] == EMPTYFANOUT:
                             fanouttable[fanoutkey] = location
                     rawfanouttable = ''
                     last = 0
                     for offset in fanouttable:
                         offset = offset if offset != EMPTYFANOUT else last
                         last = offset
                         rawfanouttable += struct.pack('!I', offset)
                     rawentrieslength = struct.pack('!Q', len(self.entries))
                     # The index offset is the it's location in the file. So after the 2 byte
                     # header and the fanouttable.
                     rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                     self._writeheader(params)
                     self.idxfp.write(rawfanouttable)
                     self.idxfp.write(rawentrieslength)
                     self.idxfp.write(rawindex)
                     self.idxfp.close()
                 def createindex(self, nodelocations):
                     raise NotImplementedError()
                 def _writeheader(self, indexparams):
                     # Index header
                     #    <version: 1 byte>
                     #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                     #    <unused: 7 bit> # future use (compression, delta format, etc)
                     config = 0
                     if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                         config = 0b10000000
                     self.idxfp.write(struct.pack('!BB', self.VERSION, config))
             class indexparams(object):
                 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
                              r'fanoutsize', r'indexstart')
                 def __init__(self, prefixsize, version):
                     self.fanoutprefix = prefixsize
                     # The struct pack format for fanout table location (i.e. the format that
                     # converts the node prefix into an integer location in the fanout
                     # table).
                     if prefixsize == SMALLFANOUTPREFIX:
                         self.fanoutstruct = '!B'
                     elif prefixsize == LARGEFANOUTPREFIX:
                         self.fanoutstruct = '!H'
                     else:
                         raise ValueError("invalid fanout prefix size: %s" % prefixsize)
                     # The number of fanout table entries
                     self.fanoutcount = 2**(prefixsize * 8)
                     # The total bytes used by the fanout table
                     self.fanoutsize = self.fanoutcount * 4
                     self.indexstart = FANOUTSTART + self.fanoutsize
                     # Skip the index length
                     self.indexstart += 8

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages