upstream/mercurial-mirror Commit - r40711:9fcf8084

py3: use node.hex(m.digest()) instead of m.hexdigest()...

Pulkit Goyal -

r40711:9fcf8084 default

parent child

hgext/fastannotate/context.py

0 +1 -1

              # Copyright 2016-present Facebook. All Rights Reserved.
              #
              # context: context needed to annotate a file
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import collections
              import contextlib
              import hashlib
              import os
              from mercurial.i18n import _
              from mercurial import (
                  error,
                  linelog as linelogmod,
                  lock as lockmod,
                  mdiff,
                  node,
                  pycompat,
                  scmutil,
                  util,
              )
              from mercurial.utils import (
                  stringutil,
              )
              from . import (
                  error as faerror,
                  revmap as revmapmod,
              )
              # given path, get filelog, cached
              @util.lrucachefunc
              def _getflog(repo, path):
                  return repo.file(path)
              # extracted from mercurial.context.basefilectx.annotate
              def _parents(f, follow=True):
                  # Cut _descendantrev here to mitigate the penalty of lazy linkrev
                  # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
                  # from the topmost introrev (= srcrev) down to p.linkrev() if it
                  # isn't an ancestor of the srcrev.
                  f._changeid
                  pl = f.parents()
                  # Don't return renamed parents if we aren't following.
                  if not follow:
                      pl = [p for p in pl if p.path() == f.path()]
                  # renamed filectx won't have a filelog yet, so set it
                  # from the cache to save time
                  for p in pl:
                      if not '_filelog' in p.__dict__:
                          p._filelog = _getflog(f._repo, p.path())
                  return pl
              # extracted from mercurial.context.basefilectx.annotate. slightly modified
              # so it takes a fctx instead of a pair of text and fctx.
              def _decorate(fctx):
                  text = fctx.data()
                  linecount = text.count('\n')
                  if text and not text.endswith('\n'):
                      linecount += 1
                  return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
              # extracted from mercurial.context.basefilectx.annotate. slightly modified
              # so it takes an extra "blocks" parameter calculated elsewhere, instead of
              # calculating diff here.
              def _pair(parent, child, blocks):
                  for (a1, a2, b1, b2), t in blocks:
                      # Changed blocks ('!') or blocks made only of blank lines ('~')
                      # belong to the child.
                      if t == '=':
                          child[0][b1:b2] = parent[0][a1:a2]
                  return child
              # like scmutil.revsingle, but with lru cache, so their states (like manifests)
              # could be reused
              _revsingle = util.lrucachefunc(scmutil.revsingle)
              def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
                  """(repo, str, str) -> fctx
                  get the filectx object from repo, rev, path, in an efficient way.
                  if resolverev is True, "rev" is a revision specified by the revset
                  language, otherwise "rev" is a nodeid, or a revision number that can
                  be consumed by repo.__getitem__.
                  if adjustctx is not None, the returned fctx will point to a changeset
                  that introduces the change (last modified the file). if adjustctx
                  is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
                  faster for big repos but is incorrect for some cases.
                  """
                  if resolverev and not isinstance(rev, int) and rev is not None:
                      ctx = _revsingle(repo, rev)
                  else:
                      ctx = repo[rev]
                  # If we don't need to adjust the linkrev, create the filectx using the
                  # changectx instead of using ctx[path]. This means it already has the
                  # changectx information, so blame -u will be able to look directly at the
                  # commitctx object instead of having to resolve it by going through the
                  # manifest. In a lazy-manifest world this can prevent us from downloading a
                  # lot of data.
                  if adjustctx is None:
                      # ctx.rev() is None means it's the working copy, which is a special
                      # case.
                      if ctx.rev() is None:
                          fctx = ctx[path]
                      else:
                          fctx = repo.filectx(path, changeid=ctx.rev())
                  else:
                      fctx = ctx[path]
                      if adjustctx == 'linkrev':
                          introrev = fctx.linkrev()
                      else:
                          introrev = fctx.introrev()
                      if introrev != ctx.rev():
                          fctx._changeid = introrev
                          fctx._changectx = repo[introrev]
                  return fctx
              # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
              def encodedir(path):
                  return (path
                          .replace('.hg/', '.hg.hg/')
                          .replace('.l/', '.l.hg/')
                          .replace('.m/', '.m.hg/')
                          .replace('.lock/', '.lock.hg/'))
              def hashdiffopts(diffopts):
                  diffoptstr = stringutil.pprint(sorted(
                      (k, getattr(diffopts, k))
                      for k in mdiff.diffopts.defaults
                  ))
-                 return hashlib.sha1(diffoptstr).hexdigest()[:6]
+                 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
              _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
              class annotateopts(object):
                  """like mercurial.mdiff.diffopts, but is for annotate
                  followrename: follow renames, like "hg annotate -f"
                  followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
                  """
                  defaults = {
                      'diffopts': None,
                      'followrename': True,
                      'followmerge': True,
                  }
                  def __init__(self, **opts):
                      opts = pycompat.byteskwargs(opts)
                      for k, v in self.defaults.iteritems():
                          setattr(self, k, opts.get(k, v))
                  @util.propertycache
                  def shortstr(self):
                      """represent opts in a short string, suitable for a directory name"""
                      result = ''
                      if not self.followrename:
                          result += 'r0'
                      if not self.followmerge:
                          result += 'm0'
                      if self.diffopts is not None:
                          assert isinstance(self.diffopts, mdiff.diffopts)
                          diffopthash = hashdiffopts(self.diffopts)
                          if diffopthash != _defaultdiffopthash:
                              result += 'i' + diffopthash
                      return result or 'default'
              defaultopts = annotateopts()
              class _annotatecontext(object):
                  """do not use this class directly as it does not use lock to protect
                  writes. use "with annotatecontext(...)" instead.
                  """
                  def __init__(self, repo, path, linelogpath, revmappath, opts):
                      self.repo = repo
                      self.ui = repo.ui
                      self.path = path
                      self.opts = opts
                      self.linelogpath = linelogpath
                      self.revmappath = revmappath
                      self._linelog = None
                      self._revmap = None
                      self._node2path = {} # {str: str}
                  @property
                  def linelog(self):
                      if self._linelog is None:
                          if os.path.exists(self.linelogpath):
                              with open(self.linelogpath, 'rb') as f:
                                  try:
                                      self._linelog = linelogmod.linelog.fromdata(f.read())
                                  except linelogmod.LineLogError:
                                      self._linelog = linelogmod.linelog()
                          else:
                              self._linelog = linelogmod.linelog()
                      return self._linelog
                  @property
                  def revmap(self):
                      if self._revmap is None:
                          self._revmap = revmapmod.revmap(self.revmappath)
                      return self._revmap
                  def close(self):
                      if self._revmap is not None:
                          self._revmap.flush()
                          self._revmap = None
                      if self._linelog is not None:
                          with open(self.linelogpath, 'wb') as f:
                              f.write(self._linelog.encode())
                          self._linelog = None
                  __del__ = close
                  def rebuild(self):
                      """delete linelog and revmap, useful for rebuilding"""
                      self.close()
                      self._node2path.clear()
                      _unlinkpaths([self.revmappath, self.linelogpath])
                  @property
                  def lastnode(self):
                      """return last node in revmap, or None if revmap is empty"""
                      if self._revmap is None:
                          # fast path, read revmap without loading its full content
                          return revmapmod.getlastnode(self.revmappath)
                      else:
                          return self._revmap.rev2hsh(self._revmap.maxrev)
                  def isuptodate(self, master, strict=True):
                      """return True if the revmap / linelog is up-to-date, or the file
                      does not exist in the master revision. False otherwise.
                      it tries to be fast and could return false negatives, because of the
                      use of linkrev instead of introrev.
                      useful for both server and client to decide whether to update
                      fastannotate cache or not.
                      if strict is True, even if fctx exists in the revmap, but is not the
                      last node, isuptodate will return False. it's good for performance - no
                      expensive check was done.
                      if strict is False, if fctx exists in the revmap, this function may
                      return True. this is useful for the client to skip downloading the
                      cache if the client's master is behind the server's.
                      """
                      lastnode = self.lastnode
                      try:
                          f = self._resolvefctx(master, resolverev=True)
                          # choose linkrev instead of introrev as the check is meant to be
                          # *fast*.
                          linknode = self.repo.changelog.node(f.linkrev())
                          if not strict and lastnode and linknode != lastnode:
                              # check if f.node() is in the revmap. note: this loads the
                              # revmap and can be slow.
                              return self.revmap.hsh2rev(linknode) is not None
                          # avoid resolving old manifest, or slow adjustlinkrev to be fast,
                          # false negatives are acceptable in this case.
                          return linknode == lastnode
                      except LookupError:
                          # master does not have the file, or the revmap is ahead
                          return True
                  def annotate(self, rev, master=None, showpath=False, showlines=False):
                      """incrementally update the cache so it includes revisions in the main
                      branch till 'master'. and run annotate on 'rev', which may or may not be
                      included in the main branch.
                      if master is None, do not update linelog.
                      the first value returned is the annotate result, it is [(node, linenum)]
                      by default. [(node, linenum, path)] if showpath is True.
                      if showlines is True, a second value will be returned, it is a list of
                      corresponding line contents.
                      """
                      # the fast path test requires commit hash, convert rev number to hash,
                      # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
                      # command could give us a revision number even if the user passes a
                      # commit hash.
                      if isinstance(rev, int):
                          rev = node.hex(self.repo.changelog.node(rev))
                      # fast path: if rev is in the main branch already
                      directly, revfctx = self.canannotatedirectly(rev)
                      if directly:
                          if self.ui.debugflag:
                              self.ui.debug('fastannotate: %s: using fast path '
                                            '(resolved fctx: %s)\n'
                                            % (self.path,
                                               stringutil.pprint(util.safehasattr(revfctx,
                                                                                  'node'))))
                          return self.annotatedirectly(revfctx, showpath, showlines)
                      # resolve master
                      masterfctx = None
                      if master:
                          try:
                              masterfctx = self._resolvefctx(master, resolverev=True,
                                                             adjustctx=True)
                          except LookupError: # master does not have the file
                              pass
                          else:
                              if masterfctx in self.revmap: # no need to update linelog
                                  masterfctx = None
                      #                  ... - @ <- rev (can be an arbitrary changeset,
                      #                 /                not necessarily a descendant
                      #      master -> o                 of master)
                      #                |
                      #     a merge -> o         'o': new changesets in the main branch
                      #                |\        '#': revisions in the main branch that
                      #                o *            exist in linelog / revmap
                      #                | .       '*': changesets in side branches, or
                      # last master -> # .            descendants of master
                      #                | .
                      #                # *       joint: '#', and is a parent of a '*'
                      #                |/
                      #     a joint -> # ^^^^ --- side branches
                      #                |
                      #                ^ --- main branch (in linelog)
                      # these DFSes are similar to the traditional annotate algorithm.
                      # we cannot really reuse the code for perf reason.
                      # 1st DFS calculates merges, joint points, and needed.
                      # "needed" is a simple reference counting dict to free items in
                      # "hist", reducing its memory usage otherwise could be huge.
                      initvisit = [revfctx]
                      if masterfctx:
                          if masterfctx.rev() is None:
                              raise error.Abort(_('cannot update linelog to wdir()'),
                                                hint=_('set fastannotate.mainbranch'))
                          initvisit.append(masterfctx)
                      visit = initvisit[:]
                      pcache = {}
                      needed = {revfctx: 1}
                      hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
                      while visit:
                          f = visit.pop()
                          if f in pcache or f in hist:
                              continue
                          if f in self.revmap: # in the old main branch, it's a joint
                              llrev = self.revmap.hsh2rev(f.node())
                              self.linelog.annotate(llrev)
                              result = self.linelog.annotateresult
                              hist[f] = (result, f.data())
                              continue
                          pl = self._parentfunc(f)
                          pcache[f] = pl
                          for p in pl:
                              needed[p] = needed.get(p, 0) + 1
                              if p not in pcache:
                                  visit.append(p)
                      # 2nd (simple) DFS calculates new changesets in the main branch
                      # ('o' nodes in # the above graph), so we know when to update linelog.
                      newmainbranch = set()
                      f = masterfctx
                      while f and f not in self.revmap:
                          newmainbranch.add(f)
                          pl = pcache[f]
                          if pl:
                              f = pl[0]
                          else:
                              f = None
                              break
                      # f, if present, is the position where the last build stopped at, and
                      # should be the "master" last time. check to see if we can continue
                      # building the linelog incrementally. (we cannot if diverged)
                      if masterfctx is not None:
                          self._checklastmasterhead(f)
                      if self.ui.debugflag:
                          if newmainbranch:
                              self.ui.debug('fastannotate: %s: %d new changesets in the main'
                                            ' branch\n' % (self.path, len(newmainbranch)))
                          elif not hist: # no joints, no updates
                              self.ui.debug('fastannotate: %s: linelog cannot help in '
                                            'annotating this revision\n' % self.path)
                      # prepare annotateresult so we can update linelog incrementally
                      self.linelog.annotate(self.linelog.maxrev)
                      # 3rd DFS does the actual annotate
                      visit = initvisit[:]
                      progress = 0
                      while visit:
                          f = visit[-1]
                          if f in hist:
                              visit.pop()
                              continue
                          ready = True
                          pl = pcache[f]
                          for p in pl:
                              if p not in hist:
                                  ready = False
                                  visit.append(p)
                          if not ready:
                              continue
                          visit.pop()
                          blocks = None # mdiff blocks, used for appending linelog
                          ismainbranch = (f in newmainbranch)
                          # curr is the same as the traditional annotate algorithm,
                          # if we only care about linear history (do not follow merge),
                          # then curr is not actually used.
                          assert f not in hist
                          curr = _decorate(f)
                          for i, p in enumerate(pl):
                              bs = list(self._diffblocks(hist[p][1], curr[1]))
                              if i == 0 and ismainbranch:
                                  blocks = bs
                              curr = _pair(hist[p], curr, bs)
                              if needed[p] == 1:
                                  del hist[p]
                                  del needed[p]
                              else:
                                  needed[p] -= 1
                          hist[f] = curr
                          del pcache[f]
                          if ismainbranch: # need to write to linelog
                              if not self.ui.quiet:
                                  progress += 1
                                  self.ui.progress(_('building cache'), progress,
                                                   total=len(newmainbranch))
                              bannotated = None
                              if len(pl) == 2 and self.opts.followmerge: # merge
                                  bannotated = curr[0]
                              if blocks is None: # no parents, add an empty one
                                  blocks = list(self._diffblocks('', curr[1]))
                              self._appendrev(f, blocks, bannotated)
                          elif showpath: # not append linelog, but we need to record path
                              self._node2path[f.node()] = f.path()
                      if progress: # clean progress bar
                          self.ui.write()
                      result = [
                          ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
                          for fr, l in hist[revfctx][0]] # [(node, linenumber)]
                      return self._refineannotateresult(result, revfctx, showpath, showlines)
                  def canannotatedirectly(self, rev):
                      """(str) -> bool, fctx or node.
                      return (True, f) if we can annotate without updating the linelog, pass
                      f to annotatedirectly.
                      return (False, f) if we need extra calculation. f is the fctx resolved
                      from rev.
                      """
                      result = True
                      f = None
                      if not isinstance(rev, int) and rev is not None:
                          hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
                          if hsh is not None and (hsh, self.path) in self.revmap:
                              f = hsh
                      if f is None:
                          adjustctx = 'linkrev' if self._perfhack else True
                          f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
                          result = f in self.revmap
                          if not result and self._perfhack:
                              # redo the resolution without perfhack - as we are going to
                              # do write operations, we need a correct fctx.
                              f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
                      return result, f
                  def annotatealllines(self, rev, showpath=False, showlines=False):
                      """(rev : str) -> [(node : str, linenum : int, path : str)]
                      the result has the same format with annotate, but include all (including
                      deleted) lines up to rev. call this after calling annotate(rev, ...) for
                      better performance and accuracy.
                      """
                      revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
                      # find a chain from rev to anything in the mainbranch
                      if revfctx not in self.revmap:
                          chain = [revfctx]
                          a = ''
                          while True:
                              f = chain[-1]
                              pl = self._parentfunc(f)
                              if not pl:
                                  break
                              if pl[0] in self.revmap:
                                  a = pl[0].data()
                                  break
                              chain.append(pl[0])
                          # both self.linelog and self.revmap is backed by filesystem. now
                          # we want to modify them but do not want to write changes back to
                          # files. so we create in-memory objects and copy them. it's like
                          # a "fork".
                          linelog = linelogmod.linelog()
                          linelog.copyfrom(self.linelog)
                          linelog.annotate(linelog.maxrev)
                          revmap = revmapmod.revmap()
                          revmap.copyfrom(self.revmap)
                          for f in reversed(chain):
                              b = f.data()
                              blocks = list(self._diffblocks(a, b))
                              self._doappendrev(linelog, revmap, f, blocks)
                              a = b
                      else:
                          # fastpath: use existing linelog, revmap as we don't write to them
                          linelog = self.linelog
                          revmap = self.revmap
                      lines = linelog.getalllines()
                      hsh = revfctx.node()
                      llrev = revmap.hsh2rev(hsh)
                      result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
                      # cannot use _refineannotateresult since we need custom logic for
                      # resolving line contents
                      if showpath:
                          result = self._addpathtoresult(result, revmap)
                      if showlines:
                          linecontents = self._resolvelines(result, revmap, linelog)
                          result = (result, linecontents)
                      return result
                  def _resolvelines(self, annotateresult, revmap, linelog):
                      """(annotateresult) -> [line]. designed for annotatealllines.
                      this is probably the most inefficient code in the whole fastannotate
                      directory. but we have made a decision that the linelog does not
                      store line contents. so getting them requires random accesses to
                      the revlog data, since they can be many, it can be very slow.
                      """
                      # [llrev]
                      revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
                      result = [None] * len(annotateresult)
                      # {(rev, linenum): [lineindex]}
                      key2idxs = collections.defaultdict(list)
                      for i in pycompat.xrange(len(result)):
                          key2idxs[(revs[i], annotateresult[i][1])].append(i)
                      while key2idxs:
                          # find an unresolved line and its linelog rev to annotate
                          hsh = None
                          try:
                              for (rev, _linenum), idxs in key2idxs.iteritems():
                                  if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
                                      continue
                                  hsh = annotateresult[idxs[0]][0]
                                  break
                          except StopIteration: # no more unresolved lines
                              return result
                          if hsh is None:
                              # the remaining key2idxs are not in main branch, resolving them
                              # using the hard way...
                              revlines = {}
                              for (rev, linenum), idxs in key2idxs.iteritems():
                                  if rev not in revlines:
                                      hsh = annotateresult[idxs[0]][0]
                                      if self.ui.debugflag:
                                          self.ui.debug('fastannotate: reading %s line #%d '
                                                        'to resolve lines %r\n'
                                                        % (node.short(hsh), linenum, idxs))
                                      fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                                      lines = mdiff.splitnewlines(fctx.data())
                                      revlines[rev] = lines
                                  for idx in idxs:
                                      result[idx] = revlines[rev][linenum]
                              assert all(x is not None for x in result)
                              return result
                          # run the annotate and the lines should match to the file content
                          self.ui.debug('fastannotate: annotate %s to resolve lines\n'
                                        % node.short(hsh))
                          linelog.annotate(rev)
                          fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                          annotated = linelog.annotateresult
                          lines = mdiff.splitnewlines(fctx.data())
                          if len(lines) != len(annotated):
                              raise faerror.CorruptedFileError('unexpected annotated lines')
                          # resolve lines from the annotate result
                          for i, line in enumerate(lines):
                              k = annotated[i]
                              if k in key2idxs:
                                  for idx in key2idxs[k]:
                                      result[idx] = line
                                  del key2idxs[k]
                      return result
                  def annotatedirectly(self, f, showpath, showlines):
                      """like annotate, but when we know that f is in linelog.
                      f can be either a 20-char str (node) or a fctx. this is for perf - in
                      the best case, the user provides a node and we don't need to read the
                      filelog or construct any filecontext.
                      """
                      if isinstance(f, str):
                          hsh = f
                      else:
                          hsh = f.node()
                      llrev = self.revmap.hsh2rev(hsh)
                      if not llrev:
                          raise faerror.CorruptedFileError('%s is not in revmap'
                                                           % node.hex(hsh))
                      if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
                          raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
                                                           % node.hex(hsh))
                      self.linelog.annotate(llrev)
                      result = [(self.revmap.rev2hsh(r), l)
                                for r, l in self.linelog.annotateresult]
                      return self._refineannotateresult(result, f, showpath, showlines)
                  def _refineannotateresult(self, result, f, showpath, showlines):
                      """add the missing path or line contents, they can be expensive.
                      f could be either node or fctx.
                      """
                      if showpath:
                          result = self._addpathtoresult(result)
                      if showlines:
                          if isinstance(f, str): # f: node or fctx
                              llrev = self.revmap.hsh2rev(f)
                              fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
                          else:
                              fctx = f
                          lines = mdiff.splitnewlines(fctx.data())
                          if len(lines) != len(result): # linelog is probably corrupted
                              raise faerror.CorruptedFileError()
                          result = (result, lines)
                      return result
                  def _appendrev(self, fctx, blocks, bannotated=None):
                      self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
                  def _diffblocks(self, a, b):
                      return mdiff.allblocks(a, b, self.opts.diffopts)
                  @staticmethod
                  def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
                      """append a revision to linelog and revmap"""
                      def getllrev(f):
                          """(fctx) -> int"""
                          # f should not be a linelog revision
                          if isinstance(f, int):
                              raise error.ProgrammingError('f should not be an int')
                          # f is a fctx, allocate linelog rev on demand
                          hsh = f.node()
                          rev = revmap.hsh2rev(hsh)
                          if rev is None:
                              rev = revmap.append(hsh, sidebranch=True, path=f.path())
                          return rev
                      # append sidebranch revisions to revmap
                      siderevs = []
                      siderevmap = {} # node: int
                      if bannotated is not None:
                          for (a1, a2, b1, b2), op in blocks:
                              if op != '=':
                                  # f could be either linelong rev, or fctx.
                                  siderevs += [f for f, l in bannotated[b1:b2]
                                               if not isinstance(f, int)]
                      siderevs = set(siderevs)
                      if fctx in siderevs: # mainnode must be appended seperately
                          siderevs.remove(fctx)
                      for f in siderevs:
                          siderevmap[f] = getllrev(f)
                      # the changeset in the main branch, could be a merge
                      llrev = revmap.append(fctx.node(), path=fctx.path())
                      siderevmap[fctx] = llrev
                      for (a1, a2, b1, b2), op in reversed(blocks):
                          if op == '=':
                              continue
                          if bannotated is None:
                              linelog.replacelines(llrev, a1, a2, b1, b2)
                          else:
                              blines = [((r if isinstance(r, int) else siderevmap[r]), l)
                                        for r, l in bannotated[b1:b2]]
                              linelog.replacelines_vec(llrev, a1, a2, blines)
                  def _addpathtoresult(self, annotateresult, revmap=None):
                      """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
                      if revmap is None:
                          revmap = self.revmap
                      def _getpath(nodeid):
                          path = self._node2path.get(nodeid)
                          if path is None:
                              path = revmap.rev2path(revmap.hsh2rev(nodeid))
                              self._node2path[nodeid] = path
                          return path
                      return [(n, l, _getpath(n)) for n, l in annotateresult]
                  def _checklastmasterhead(self, fctx):
                      """check if fctx is the master's head last time, raise if not"""
                      if fctx is None:
                          llrev = 0
                      else:
                          llrev = self.revmap.hsh2rev(fctx.node())
                          if not llrev:
                              raise faerror.CannotReuseError()
                      if self.linelog.maxrev != llrev:
                          raise faerror.CannotReuseError()
                  @util.propertycache
                  def _parentfunc(self):
                      """-> (fctx) -> [fctx]"""
                      followrename = self.opts.followrename
                      followmerge = self.opts.followmerge
                      def parents(f):
                          pl = _parents(f, follow=followrename)
                          if not followmerge:
                              pl = pl[:1]
                          return pl
                      return parents
                  @util.propertycache
                  def _perfhack(self):
                      return self.ui.configbool('fastannotate', 'perfhack')
                  def _resolvefctx(self, rev, path=None, **kwds):
                      return resolvefctx(self.repo, rev, (path or self.path), **kwds)
              def _unlinkpaths(paths):
                  """silent, best-effort unlink"""
                  for path in paths:
                      try:
                          util.unlink(path)
                      except OSError:
                          pass
              class pathhelper(object):
                  """helper for getting paths for lockfile, linelog and revmap"""
                  def __init__(self, repo, path, opts=defaultopts):
                      # different options use different directories
                      self._vfspath = os.path.join('fastannotate',
                                                   opts.shortstr, encodedir(path))
                      self._repo = repo
                  @property
                  def dirname(self):
                      return os.path.dirname(self._repo.vfs.join(self._vfspath))
                  @property
                  def linelogpath(self):
                      return self._repo.vfs.join(self._vfspath + '.l')
                  def lock(self):
                      return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
                  @contextlib.contextmanager
                  def _lockflock(self):
                      """the same as 'lock' but use flock instead of lockmod.lock, to avoid
                      creating temporary symlinks."""
                      import fcntl
                      lockpath = self.linelogpath
                      util.makedirs(os.path.dirname(lockpath))
                      lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
                      fcntl.flock(lockfd, fcntl.LOCK_EX)
                      try:
                          yield
                      finally:
                          fcntl.flock(lockfd, fcntl.LOCK_UN)
                          os.close(lockfd)
                  @property
                  def revmappath(self):
                      return self._repo.vfs.join(self._vfspath + '.m')
              @contextlib.contextmanager
              def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
                  """context needed to perform (fast) annotate on a file
                  an annotatecontext of a single file consists of two structures: the
                  linelog and the revmap. this function takes care of locking. only 1
                  process is allowed to write that file's linelog and revmap at a time.
                  when something goes wrong, this function will assume the linelog and the
                  revmap are in a bad state, and remove them from disk.
                  use this function in the following way:
                      with annotatecontext(...) as actx:
                          actx. ....
                  """
                  helper = pathhelper(repo, path, opts)
                  util.makedirs(helper.dirname)
                  revmappath = helper.revmappath
                  linelogpath = helper.linelogpath
                  actx = None
                  try:
                      with helper.lock():
                          actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
                          if rebuild:
                              actx.rebuild()
                          yield actx
                  except Exception:
                      if actx is not None:
                          actx.rebuild()
                      repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
                      raise
                  finally:
                      if actx is not None:
                          actx.close()
              def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
                  """like annotatecontext but get the context from a fctx. convenient when
                  used in fctx.annotate
                  """
                  repo = fctx._repo
                  path = fctx._path
                  if repo.ui.configbool('fastannotate', 'forcefollow', True):
                      follow = True
                  aopts = annotateopts(diffopts=diffopts, followrename=follow)
                  return annotatecontext(repo, path, aopts, rebuild)

hgext/largefiles/lfcommands.py

0 +1 -1

              # Copyright 2009-2010 Gregory P. Ward
              # Copyright 2009-2010 Intelerad Medical Systems Incorporated
              # Copyright 2010-2011 Fog Creek Software
              # Copyright 2010-2011 Unity Technologies
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''High-level command function for lfconvert, plus the cmdtable.'''
              from __future__ import absolute_import
              import errno
              import hashlib
              import os
              import shutil
              from mercurial.i18n import _
              from mercurial import (
                  cmdutil,
                  context,
                  error,
                  hg,
                  lock,
                  match as matchmod,
                  node,
                  pycompat,
                  registrar,
                  scmutil,
                  util,
              )
              from ..convert import (
                  convcmd,
                  filemap,
              )
              from . import (
                  lfutil,
                  storefactory
              )
              release = lock.release
              # -- Commands ----------------------------------------------------------
              cmdtable = {}
              command = registrar.command(cmdtable)
              @command('lfconvert',
                  [('s', 'size', '',
                    _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
                  ('', 'to-normal', False,
                   _('convert from a largefiles repo to a normal repo')),
                  ],
                  _('hg lfconvert SOURCE DEST [FILE ...]'),
                  norepo=True,
                  inferrepo=True)
              def lfconvert(ui, src, dest, *pats, **opts):
                  '''convert a normal repository to a largefiles repository
                  Convert repository SOURCE to a new repository DEST, identical to
                  SOURCE except that certain files will be converted as largefiles:
                  specifically, any file that matches any PATTERN *or* whose size is
                  above the minimum size threshold is converted as a largefile. The
                  size used to determine whether or not to track a file as a
                  largefile is the size of the first version of the file. The
                  minimum size can be specified either with --size or in
                  configuration as ``largefiles.size``.
                  After running this command you will need to make sure that
                  largefiles is enabled anywhere you intend to push the new
                  repository.
                  Use --to-normal to convert largefiles back to normal files; after
                  this, the DEST repository can be used without largefiles at all.'''
                  opts = pycompat.byteskwargs(opts)
                  if opts['to_normal']:
                      tolfile = False
                  else:
                      tolfile = True
                      size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
                  if not hg.islocal(src):
                      raise error.Abort(_('%s is not a local Mercurial repo') % src)
                  if not hg.islocal(dest):
                      raise error.Abort(_('%s is not a local Mercurial repo') % dest)
                  rsrc = hg.repository(ui, src)
                  ui.status(_('initializing destination %s\n') % dest)
                  rdst = hg.repository(ui, dest, create=True)
                  success = False
                  dstwlock = dstlock = None
                  try:
                      # Get a list of all changesets in the source.  The easy way to do this
                      # is to simply walk the changelog, using changelog.nodesbetween().
                      # Take a look at mercurial/revlog.py:639 for more details.
                      # Use a generator instead of a list to decrease memory usage
                      ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
                          rsrc.heads())[0])
                      revmap = {node.nullid: node.nullid}
                      if tolfile:
                          # Lock destination to prevent modification while it is converted to.
                          # Don't need to lock src because we are just reading from its
                          # history which can't change.
                          dstwlock = rdst.wlock()
                          dstlock = rdst.lock()
                          lfiles = set()
                          normalfiles = set()
                          if not pats:
                              pats = ui.configlist(lfutil.longname, 'patterns')
                          if pats:
                              matcher = matchmod.match(rsrc.root, '', list(pats))
                          else:
                              matcher = None
                          lfiletohash = {}
                          with ui.makeprogress(_('converting revisions'),
                                               unit=_('revisions'),
                                               total=rsrc['tip'].rev()) as progress:
                              for ctx in ctxs:
                                  progress.update(ctx.rev())
                                  _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
                                      lfiles, normalfiles, matcher, size, lfiletohash)
                          if rdst.wvfs.exists(lfutil.shortname):
                              rdst.wvfs.rmtree(lfutil.shortname)
                          for f in lfiletohash.keys():
                              if rdst.wvfs.isfile(f):
                                  rdst.wvfs.unlink(f)
                              try:
                                  rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
                              except OSError:
                                  pass
                          # If there were any files converted to largefiles, add largefiles
                          # to the destination repository's requirements.
                          if lfiles:
                              rdst.requirements.add('largefiles')
                              rdst._writerequirements()
                      else:
                          class lfsource(filemap.filemap_source):
                              def __init__(self, ui, source):
                                  super(lfsource, self).__init__(ui, source, None)
                                  self.filemapper.rename[lfutil.shortname] = '.'
                              def getfile(self, name, rev):
                                  realname, realrev = rev
                                  f = super(lfsource, self).getfile(name, rev)
                                  if (not realname.startswith(lfutil.shortnameslash)
                                          or f[0] is None):
                                      return f
                                  # Substitute in the largefile data for the hash
                                  hash = f[0].strip()
                                  path = lfutil.findfile(rsrc, hash)
                                  if path is None:
                                      raise error.Abort(_("missing largefile for '%s' in %s")
                                                        % (realname, realrev))
                                  return util.readfile(path), f[1]
                          class converter(convcmd.converter):
                              def __init__(self, ui, source, dest, revmapfile, opts):
                                  src = lfsource(ui, source)
                                  super(converter, self).__init__(ui, src, dest, revmapfile,
                                                                  opts)
                          found, missing = downloadlfiles(ui, rsrc)
                          if missing != 0:
                              raise error.Abort(_("all largefiles must be present locally"))
                          orig = convcmd.converter
                          convcmd.converter = converter
                          try:
                              convcmd.convert(ui, src, dest, source_type='hg', dest_type='hg')
                          finally:
                              convcmd.converter = orig
                      success = True
                  finally:
                      if tolfile:
                          rdst.dirstate.clear()
                          release(dstlock, dstwlock)
                      if not success:
                          # we failed, remove the new directory
                          shutil.rmtree(rdst.root)
              def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
                      matcher, size, lfiletohash):
                  # Convert src parents to dst parents
                  parents = _convertparents(ctx, revmap)
                  # Generate list of changed files
                  files = _getchangedfiles(ctx, parents)
                  dstfiles = []
                  for f in files:
                      if f not in lfiles and f not in normalfiles:
                          islfile = _islfile(f, ctx, matcher, size)
                          # If this file was renamed or copied then copy
                          # the largefile-ness of its predecessor
                          if f in ctx.manifest():
                              fctx = ctx.filectx(f)
                              renamed = fctx.renamed()
                              if renamed is None:
                                  # the code below assumes renamed to be a boolean or a list
                                  # and won't quite work with the value None
                                  renamed = False
                              renamedlfile = renamed and renamed[0] in lfiles
                              islfile |= renamedlfile
                              if 'l' in fctx.flags():
                                  if renamedlfile:
                                      raise error.Abort(
                                          _('renamed/copied largefile %s becomes symlink')
                                          % f)
                                  islfile = False
                          if islfile:
                              lfiles.add(f)
                          else:
                              normalfiles.add(f)
                      if f in lfiles:
                          fstandin = lfutil.standin(f)
                          dstfiles.append(fstandin)
                          # largefile in manifest if it has not been removed/renamed
                          if f in ctx.manifest():
                              fctx = ctx.filectx(f)
                              if 'l' in fctx.flags():
                                  renamed = fctx.renamed()
                                  if renamed and renamed[0] in lfiles:
                                      raise error.Abort(_('largefile %s becomes symlink') % f)
                              # largefile was modified, update standins
                              m = hashlib.sha1('')
                              m.update(ctx[f].data())
-                             hash = m.hexdigest()
+                             hash = node.hex(m.digest())
                              if f not in lfiletohash or lfiletohash[f] != hash:
                                  rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
                                  executable = 'x' in ctx[f].flags()
                                  lfutil.writestandin(rdst, fstandin, hash,
                                      executable)
                                  lfiletohash[f] = hash
                      else:
                          # normal file
                          dstfiles.append(f)
                  def getfilectx(repo, memctx, f):
                      srcfname = lfutil.splitstandin(f)
                      if srcfname is not None:
                          # if the file isn't in the manifest then it was removed
                          # or renamed, return None to indicate this
                          try:
                              fctx = ctx.filectx(srcfname)
                          except error.LookupError:
                              return None
                          renamed = fctx.renamed()
                          if renamed:
                              # standin is always a largefile because largefile-ness
                              # doesn't change after rename or copy
                              renamed = lfutil.standin(renamed[0])
                          return context.memfilectx(repo, memctx, f,
                                                    lfiletohash[srcfname] + '\n',
                                                    'l' in fctx.flags(), 'x' in fctx.flags(),
                                                    renamed)
                      else:
                          return _getnormalcontext(repo, ctx, f, revmap)
                  # Commit
                  _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
              def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
                  mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
                                        getfilectx, ctx.user(), ctx.date(), ctx.extra())
                  ret = rdst.commitctx(mctx)
                  lfutil.copyalltostore(rdst, ret)
                  rdst.setparents(ret)
                  revmap[ctx.node()] = rdst.changelog.tip()
              # Generate list of changed files
              def _getchangedfiles(ctx, parents):
                  files = set(ctx.files())
                  if node.nullid not in parents:
                      mc = ctx.manifest()
                      mp1 = ctx.parents()[0].manifest()
                      mp2 = ctx.parents()[1].manifest()
                      files |= (set(mp1) | set(mp2)) - set(mc)
                      for f in mc:
                          if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
                              files.add(f)
                  return files
              # Convert src parents to dst parents
              def _convertparents(ctx, revmap):
                  parents = []
                  for p in ctx.parents():
                      parents.append(revmap[p.node()])
                  while len(parents) < 2:
                      parents.append(node.nullid)
                  return parents
              # Get memfilectx for a normal file
              def _getnormalcontext(repo, ctx, f, revmap):
                  try:
                      fctx = ctx.filectx(f)
                  except error.LookupError:
                      return None
                  renamed = fctx.renamed()
                  if renamed:
                      renamed = renamed[0]
                  data = fctx.data()
                  if f == '.hgtags':
                      data = _converttags (repo.ui, revmap, data)
                  return context.memfilectx(repo, ctx, f, data, 'l' in fctx.flags(),
                                            'x' in fctx.flags(), renamed)
              # Remap tag data using a revision map
              def _converttags(ui, revmap, data):
                  newdata = []
                  for line in data.splitlines():
                      try:
                          id, name = line.split(' ', 1)
                      except ValueError:
                          ui.warn(_('skipping incorrectly formatted tag %s\n')
                              % line)
                          continue
                      try:
                          newid = node.bin(id)
                      except TypeError:
                          ui.warn(_('skipping incorrectly formatted id %s\n')
                              % id)
                          continue
                      try:
                          newdata.append('%s %s\n' % (node.hex(revmap[newid]),
                              name))
                      except KeyError:
                          ui.warn(_('no mapping for id %s\n') % id)
                          continue
                  return ''.join(newdata)
              def _islfile(file, ctx, matcher, size):
                  '''Return true if file should be considered a largefile, i.e.
                  matcher matches it or it is larger than size.'''
                  # never store special .hg* files as largefiles
                  if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
                      return False
                  if matcher and matcher(file):
                      return True
                  try:
                      return ctx.filectx(file).size() >= size * 1024 * 1024
                  except error.LookupError:
                      return False
              def uploadlfiles(ui, rsrc, rdst, files):
                  '''upload largefiles to the central store'''
                  if not files:
                      return
                  store = storefactory.openstore(rsrc, rdst, put=True)
                  at = 0
                  ui.debug("sending statlfile command for %d largefiles\n" % len(files))
                  retval = store.exists(files)
                  files = [h for h in files if not retval[h]]
                  ui.debug("%d largefiles need to be uploaded\n" % len(files))
                  with ui.makeprogress(_('uploading largefiles'), unit=_('files'),
                                       total=len(files)) as progress:
                      for hash in files:
                          progress.update(at)
                          source = lfutil.findfile(rsrc, hash)
                          if not source:
                              raise error.Abort(_('largefile %s missing from store'
                                                 ' (needs to be uploaded)') % hash)
                          # XXX check for errors here
                          store.put(source, hash)
                          at += 1
              def verifylfiles(ui, repo, all=False, contents=False):
                  '''Verify that every largefile revision in the current changeset
                  exists in the central store.  With --contents, also verify that
                  the contents of each local largefile file revision are correct (SHA-1 hash
                  matches the revision ID).  With --all, check every changeset in
                  this repository.'''
                  if all:
                      revs = repo.revs('all()')
                  else:
                      revs = ['.']
                  store = storefactory.openstore(repo)
                  return store.verify(revs, contents=contents)
              def cachelfiles(ui, repo, node, filelist=None):
                  '''cachelfiles ensures that all largefiles needed by the specified revision
                  are present in the repository's largefile cache.
                  returns a tuple (cached, missing).  cached is the list of files downloaded
                  by this operation; missing is the list of files that were needed but could
                  not be found.'''
                  lfiles = lfutil.listlfiles(repo, node)
                  if filelist:
                      lfiles = set(lfiles) & set(filelist)
                  toget = []
                  ctx = repo[node]
                  for lfile in lfiles:
                      try:
                          expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
                      except IOError as err:
                          if err.errno == errno.ENOENT:
                              continue # node must be None and standin wasn't found in wctx
                          raise
                      if not lfutil.findfile(repo, expectedhash):
                          toget.append((lfile, expectedhash))
                  if toget:
                      store = storefactory.openstore(repo)
                      ret = store.get(toget)
                      return ret
                  return ([], [])
              def downloadlfiles(ui, repo, rev=None):
                  match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
                  def prepare(ctx, fns):
                      pass
                  totalsuccess = 0
                  totalmissing = 0
                  if rev != []: # walkchangerevs on empty list would return all revs
                      for ctx in cmdutil.walkchangerevs(repo, match, {'rev' : rev},
                                                        prepare):
                          success, missing = cachelfiles(ui, repo, ctx.node())
                          totalsuccess += len(success)
                          totalmissing += len(missing)
                  ui.status(_("%d additional largefiles cached\n") % totalsuccess)
                  if totalmissing > 0:
                      ui.status(_("%d largefiles failed to download\n") % totalmissing)
                  return totalsuccess, totalmissing
              def updatelfiles(ui, repo, filelist=None, printmessage=None,
                               normallookup=False):
                  '''Update largefiles according to standins in the working directory
                  If ``printmessage`` is other than ``None``, it means "print (or
                  ignore, for false) message forcibly".
                  '''
                  statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
                  with repo.wlock():
                      lfdirstate = lfutil.openlfdirstate(ui, repo)
                      lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
                      if filelist is not None:
                          filelist = set(filelist)
                          lfiles = [f for f in lfiles if f in filelist]
                      update = {}
                      dropped = set()
                      updated, removed = 0, 0
                      wvfs = repo.wvfs
                      wctx = repo[None]
                      for lfile in lfiles:
                          rellfile = lfile
                          rellfileorig = os.path.relpath(
                              scmutil.origpath(ui, repo, wvfs.join(rellfile)),
                              start=repo.root)
                          relstandin = lfutil.standin(lfile)
                          relstandinorig = os.path.relpath(
                              scmutil.origpath(ui, repo, wvfs.join(relstandin)),
                              start=repo.root)
                          if wvfs.exists(relstandin):
                              if (wvfs.exists(relstandinorig) and
                                  wvfs.exists(rellfile)):
                                  shutil.copyfile(wvfs.join(rellfile),
                                                  wvfs.join(rellfileorig))
                                  wvfs.unlinkpath(relstandinorig)
                              expecthash = lfutil.readasstandin(wctx[relstandin])
                              if expecthash != '':
                                  if lfile not in wctx: # not switched to normal file
                                      if repo.dirstate[relstandin] != '?':
                                          wvfs.unlinkpath(rellfile, ignoremissing=True)
                                      else:
                                          dropped.add(rellfile)
                                  # use normallookup() to allocate an entry in largefiles
                                  # dirstate to prevent lfilesrepo.status() from reporting
                                  # missing files as removed.
                                  lfdirstate.normallookup(lfile)
                                  update[lfile] = expecthash
                          else:
                              # Remove lfiles for which the standin is deleted, unless the
                              # lfile is added to the repository again. This happens when a
                              # largefile is converted back to a normal file: the standin
                              # disappears, but a new (normal) file appears as the lfile.
                              if (wvfs.exists(rellfile) and
                                  repo.dirstate.normalize(lfile) not in wctx):
                                  wvfs.unlinkpath(rellfile)
                                  removed += 1
                      # largefile processing might be slow and be interrupted - be prepared
                      lfdirstate.write()
                      if lfiles:
                          lfiles = [f for f in lfiles if f not in dropped]
                          for f in dropped:
                              repo.wvfs.unlinkpath(lfutil.standin(f))
                              # This needs to happen for dropped files, otherwise they stay in
                              # the M state.
                              lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
                          statuswriter(_('getting changed largefiles\n'))
                          cachelfiles(ui, repo, None, lfiles)
                      for lfile in lfiles:
                          update1 = 0
                          expecthash = update.get(lfile)
                          if expecthash:
                              if not lfutil.copyfromcache(repo, expecthash, lfile):
                                  # failed ... but already removed and set to normallookup
                                  continue
                              # Synchronize largefile dirstate to the last modified
                              # time of the file
                              lfdirstate.normal(lfile)
                              update1 = 1
                          # copy the exec mode of largefile standin from the repository's
                          # dirstate to its state in the lfdirstate.
                          rellfile = lfile
                          relstandin = lfutil.standin(lfile)
                          if wvfs.exists(relstandin):
                              # exec is decided by the users permissions using mask 0o100
                              standinexec = wvfs.stat(relstandin).st_mode & 0o100
                              st = wvfs.stat(rellfile)
                              mode = st.st_mode
                              if standinexec != mode & 0o100:
                                  # first remove all X bits, then shift all R bits to X
                                  mode &= ~0o111
                                  if standinexec:
                                      mode |= (mode >> 2) & 0o111 & ~util.umask
                                  wvfs.chmod(rellfile, mode)
                                  update1 = 1
                          updated += update1
                          lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
                      lfdirstate.write()
                      if lfiles:
                          statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
                              removed))
              @command('lfpull',
                  [('r', 'rev', [], _('pull largefiles for these revisions'))
                  ] + cmdutil.remoteopts,
                  _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
              def lfpull(ui, repo, source="default", **opts):
                  """pull largefiles for the specified revisions from the specified source
                  Pull largefiles that are referenced from local changesets but missing
                  locally, pulling from a remote repository to the local cache.
                  If SOURCE is omitted, the 'default' path will be used.
                  See :hg:`help urls` for more information.
                  .. container:: verbose
                    Some examples:
                    - pull largefiles for all branch heads::
                        hg lfpull -r "head() and not closed()"
                    - pull largefiles on the default branch::
                        hg lfpull -r "branch(default)"
                  """
                  repo.lfpullsource = source
                  revs = opts.get(r'rev', [])
                  if not revs:
                      raise error.Abort(_('no revisions specified'))
                  revs = scmutil.revrange(repo, revs)
                  numcached = 0
                  for rev in revs:
                      ui.note(_('pulling largefiles for revision %d\n') % rev)
                      (cached, missing) = cachelfiles(ui, repo, rev)
                      numcached += len(cached)
                  ui.status(_("%d largefiles cached\n") % numcached)
              @command('debuglfput',
                  [] + cmdutil.remoteopts,
                  _('FILE'))
              def debuglfput(ui, repo, filepath, **kwargs):
                  hash = lfutil.hashfile(filepath)
                  storefactory.openstore(repo).put(filepath, hash)
                  ui.write('%s\n' % hash)
                  return 0

hgext/lfs/blobstore.py

0 +5 -4

              # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
              #
              # Copyright 2017 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import contextlib
              import errno
              import hashlib
              import json
              import os
              import re
              import socket
              from mercurial.i18n import _
              from mercurial import (
                  encoding,
                  error,
+                 node,
                  pathutil,
                  pycompat,
                  url as urlmod,
                  util,
                  vfs as vfsmod,
                  worker,
              )
              from mercurial.utils import (
                  stringutil,
              )
              from ..largefiles import lfutil
              # 64 bytes for SHA256
              _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
              class lfsvfs(vfsmod.vfs):
                  def join(self, path):
                      """split the path at first two characters, like: XX/XXXXX..."""
                      if not _lfsre.match(path):
                          raise error.ProgrammingError('unexpected lfs path: %s' % path)
                      return super(lfsvfs, self).join(path[0:2], path[2:])
                  def walk(self, path=None, onerror=None):
                      """Yield (dirpath, [], oids) tuple for blobs under path
                      Oids only exist in the root of this vfs, so dirpath is always ''.
                      """
                      root = os.path.normpath(self.base)
                      # when dirpath == root, dirpath[prefixlen:] becomes empty
                      # because len(dirpath) < prefixlen.
                      prefixlen = len(pathutil.normasprefix(root))
                      oids = []
                      for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
                                                          onerror=onerror):
                          dirpath = dirpath[prefixlen:]
                          # Silently skip unexpected files and directories
                          if len(dirpath) == 2:
                              oids.extend([dirpath + f for f in files
                                           if _lfsre.match(dirpath + f)])
                      yield ('', [], oids)
              class nullvfs(lfsvfs):
                  def __init__(self):
                      pass
                  def exists(self, oid):
                      return False
                  def read(self, oid):
                      # store.read() calls into here if the blob doesn't exist in its
                      # self.vfs.  Raise the same error as a normal vfs when asked to read a
                      # file that doesn't exist.  The only difference is the full file path
                      # isn't available in the error.
                      raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
                  def walk(self, path=None, onerror=None):
                      return ('', [], [])
                  def write(self, oid, data):
                      pass
              class filewithprogress(object):
                  """a file-like object that supports __len__ and read.
                  Useful to provide progress information for how many bytes are read.
                  """
                  def __init__(self, fp, callback):
                      self._fp = fp
                      self._callback = callback # func(readsize)
                      fp.seek(0, os.SEEK_END)
                      self._len = fp.tell()
                      fp.seek(0)
                  def __len__(self):
                      return self._len
                  def read(self, size):
                      if self._fp is None:
                          return b''
                      data = self._fp.read(size)
                      if data:
                          if self._callback:
                              self._callback(len(data))
                      else:
                          self._fp.close()
                          self._fp = None
                      return data
              class local(object):
                  """Local blobstore for large file contents.
                  This blobstore is used both as a cache and as a staging area for large blobs
                  to be uploaded to the remote blobstore.
                  """
                  def __init__(self, repo):
                      fullpath = repo.svfs.join('lfs/objects')
                      self.vfs = lfsvfs(fullpath)
                      if repo.ui.configbool('experimental', 'lfs.disableusercache'):
                          self.cachevfs = nullvfs()
                      else:
                          usercache = lfutil._usercachedir(repo.ui, 'lfs')
                          self.cachevfs = lfsvfs(usercache)
                      self.ui = repo.ui
                  def open(self, oid):
                      """Open a read-only file descriptor to the named blob, in either the
                      usercache or the local store."""
                      # The usercache is the most likely place to hold the file.  Commit will
                      # write to both it and the local store, as will anything that downloads
                      # the blobs.  However, things like clone without an update won't
                      # populate the local store.  For an init + push of a local clone,
                      # the usercache is the only place it _could_ be.  If not present, the
                      # missing file msg here will indicate the local repo, not the usercache.
                      if self.cachevfs.exists(oid):
                          return self.cachevfs(oid, 'rb')
                      return self.vfs(oid, 'rb')
                  def download(self, oid, src):
                      """Read the blob from the remote source in chunks, verify the content,
                      and write to this local blobstore."""
                      sha256 = hashlib.sha256()
                      with self.vfs(oid, 'wb', atomictemp=True) as fp:
                          for chunk in util.filechunkiter(src, size=1048576):
                              fp.write(chunk)
                              sha256.update(chunk)
-                         realoid = sha256.hexdigest()
+                         realoid = node.hex(sha256.digest())
                          if realoid != oid:
                              raise LfsCorruptionError(_('corrupt remote lfs object: %s')
                                                       % oid)
                      self._linktousercache(oid)
                  def write(self, oid, data):
                      """Write blob to local blobstore.
                      This should only be called from the filelog during a commit or similar.
                      As such, there is no need to verify the data.  Imports from a remote
                      store must use ``download()`` instead."""
                      with self.vfs(oid, 'wb', atomictemp=True) as fp:
                          fp.write(data)
                      self._linktousercache(oid)
                  def linkfromusercache(self, oid):
                      """Link blobs found in the user cache into this store.
                      The server module needs to do this when it lets the client know not to
                      upload the blob, to ensure it is always available in this store.
                      Normally this is done implicitly when the client reads or writes the
                      blob, but that doesn't happen when the server tells the client that it
                      already has the blob.
                      """
                      if (not isinstance(self.cachevfs, nullvfs)
                          and not self.vfs.exists(oid)):
                          self.ui.note(_('lfs: found %s in the usercache\n') % oid)
                          lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                  def _linktousercache(self, oid):
                      # XXX: should we verify the content of the cache, and hardlink back to
                      # the local store on success, but truncate, write and link on failure?
                      if (not self.cachevfs.exists(oid)
                          and not isinstance(self.cachevfs, nullvfs)):
                          self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
                          lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
                  def read(self, oid, verify=True):
                      """Read blob from local blobstore."""
                      if not self.vfs.exists(oid):
                          blob = self._read(self.cachevfs, oid, verify)
                          # Even if revlog will verify the content, it needs to be verified
                          # now before making the hardlink to avoid propagating corrupt blobs.
                          # Don't abort if corruption is detected, because `hg verify` will
                          # give more useful info about the corruption- simply don't add the
                          # hardlink.
-                         if verify or hashlib.sha256(blob).hexdigest() == oid:
+                         if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
                              self.ui.note(_('lfs: found %s in the usercache\n') % oid)
                              lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                      else:
                          self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
                          blob = self._read(self.vfs, oid, verify)
                      return blob
                  def _read(self, vfs, oid, verify):
                      """Read blob (after verifying) from the given store"""
                      blob = vfs.read(oid)
                      if verify:
                          _verify(oid, blob)
                      return blob
                  def verify(self, oid):
                      """Indicate whether or not the hash of the underlying file matches its
                      name."""
                      sha256 = hashlib.sha256()
                      with self.open(oid) as fp:
                          for chunk in util.filechunkiter(fp, size=1048576):
                              sha256.update(chunk)
-                     return oid == sha256.hexdigest()
+                     return oid == node.hex(sha256.digest())
                  def has(self, oid):
                      """Returns True if the local blobstore contains the requested blob,
                      False otherwise."""
                      return self.cachevfs.exists(oid) or self.vfs.exists(oid)
              def _urlerrorreason(urlerror):
                  '''Create a friendly message for the given URLError to be used in an
                  LfsRemoteError message.
                  '''
                  inst = urlerror
                  if isinstance(urlerror.reason, Exception):
                      inst = urlerror.reason
                  if util.safehasattr(inst, 'reason'):
                      try: # usually it is in the form (errno, strerror)
                          reason = inst.reason.args[1]
                      except (AttributeError, IndexError):
                          # it might be anything, for example a string
                          reason = inst.reason
                      if isinstance(reason, pycompat.unicode):
                          # SSLError of Python 2.7.9 contains a unicode
                          reason = encoding.unitolocal(reason)
                      return reason
                  elif getattr(inst, "strerror", None):
                      return encoding.strtolocal(inst.strerror)
                  else:
                      return stringutil.forcebytestr(urlerror)
              class _gitlfsremote(object):
                  def __init__(self, repo, url):
                      ui = repo.ui
                      self.ui = ui
                      baseurl, authinfo = url.authinfo()
                      self.baseurl = baseurl.rstrip('/')
                      useragent = repo.ui.config('experimental', 'lfs.user-agent')
                      if not useragent:
                          useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
                      self.urlopener = urlmod.opener(ui, authinfo, useragent)
                      self.retry = ui.configint('lfs', 'retry')
                  def writebatch(self, pointers, fromstore):
                      """Batch upload from local to remote blobstore."""
                      self._batch(_deduplicate(pointers), fromstore, 'upload')
                  def readbatch(self, pointers, tostore):
                      """Batch download from remote to local blostore."""
                      self._batch(_deduplicate(pointers), tostore, 'download')
                  def _batchrequest(self, pointers, action):
                      """Get metadata about objects pointed by pointers for given action
                      Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
                      See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
                      """
                      objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
                      requestdata = json.dumps({
                          'objects': objects,
                          'operation': action,
                      })
                      url = '%s/objects/batch' % self.baseurl
                      batchreq = util.urlreq.request(url, data=requestdata)
                      batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
                      batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
                      try:
                          with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
                              rawjson = rsp.read()
                      except util.urlerr.httperror as ex:
                          hints = {
 : _('check that lfs serving is enabled on %s and "%s" is '
                                     'supported') % (self.baseurl, action),
 : _('the "lfs.url" config may be used to override %s')
                                     % self.baseurl,
                          }
                          hint = hints.get(ex.code, _('api=%s, action=%s') % (url, action))
                          raise LfsRemoteError(_('LFS HTTP error: %s') % ex, hint=hint)
                      except util.urlerr.urlerror as ex:
                          hint = (_('the "lfs.url" config may be used to override %s')
                                  % self.baseurl)
                          raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
                                               hint=hint)
                      try:
                          response = json.loads(rawjson)
                      except ValueError:
                          raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
                                               % rawjson)
                      if self.ui.debugflag:
                          self.ui.debug('Status: %d\n' % rsp.status)
                          # lfs-test-server and hg serve return headers in different order
                          self.ui.debug('%s\n'
                                        % '\n'.join(sorted(str(rsp.info()).splitlines())))
                          if 'objects' in response:
                              response['objects'] = sorted(response['objects'],
                                                           key=lambda p: p['oid'])
                          self.ui.debug('%s\n'
                                        % json.dumps(response, indent=2,
                                                     separators=('', ': '), sort_keys=True))
                      return response
                  def _checkforservererror(self, pointers, responses, action):
                      """Scans errors from objects
                      Raises LfsRemoteError if any objects have an error"""
                      for response in responses:
                          # The server should return 404 when objects cannot be found. Some
                          # server implementation (ex. lfs-test-server)  does not set "error"
                          # but just removes "download" from "actions". Treat that case
                          # as the same as 404 error.
                          if 'error' not in response:
                              if (action == 'download'
                                  and action not in response.get('actions', [])):
                                  code = 404
                              else:
                                  continue
                          else:
                              # An error dict without a code doesn't make much sense, so
                              # treat as a server error.
                              code = response.get('error').get('code', 500)
                          ptrmap = {p.oid(): p for p in pointers}
                          p = ptrmap.get(response['oid'], None)
                          if p:
                              filename = getattr(p, 'filename', 'unknown')
                              errors = {
 : 'The object does not exist',
 : 'The object was removed by the owner',
 : 'Validation error',
 : 'Internal server error',
                              }
                              msg = errors.get(code, 'status code %d' % code)
                              raise LfsRemoteError(_('LFS server error for "%s": %s')
                                                   % (filename, msg))
                          else:
                              raise LfsRemoteError(
                                  _('LFS server error. Unsolicited response for oid %s')
                                  % response['oid'])
                  def _extractobjects(self, response, pointers, action):
                      """extract objects from response of the batch API
                      response: parsed JSON object returned by batch API
                      return response['objects'] filtered by action
                      raise if any object has an error
                      """
                      # Scan errors from objects - fail early
                      objects = response.get('objects', [])
                      self._checkforservererror(pointers, objects, action)
                      # Filter objects with given action. Practically, this skips uploading
                      # objects which exist in the server.
                      filteredobjects = [o for o in objects if action in o.get('actions', [])]
                      return filteredobjects
                  def _basictransfer(self, obj, action, localstore):
                      """Download or upload a single object using basic transfer protocol
                      obj: dict, an object description returned by batch API
                      action: string, one of ['upload', 'download']
                      localstore: blobstore.local
                      See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
                      basic-transfers.md
                      """
                      oid = pycompat.bytestr(obj['oid'])
                      href = pycompat.bytestr(obj['actions'][action].get('href'))
                      headers = obj['actions'][action].get('header', {}).items()
                      request = util.urlreq.request(href)
                      if action == 'upload':
                          # If uploading blobs, read data from local blobstore.
                          if not localstore.verify(oid):
                              raise error.Abort(_('detected corrupt lfs object: %s') % oid,
                                                hint=_('run hg verify'))
                          request.data = filewithprogress(localstore.open(oid), None)
                          request.get_method = lambda: 'PUT'
                          request.add_header('Content-Type', 'application/octet-stream')
                      for k, v in headers:
                          request.add_header(k, v)
                      response = b''
                      try:
                          with contextlib.closing(self.urlopener.open(request)) as req:
                              ui = self.ui  # Shorten debug lines
                              if self.ui.debugflag:
                                  ui.debug('Status: %d\n' % req.status)
                                  # lfs-test-server and hg serve return headers in different
                                  # order
                                  ui.debug('%s\n'
                                           % '\n'.join(sorted(str(req.info()).splitlines())))
                              if action == 'download':
                                  # If downloading blobs, store downloaded data to local
                                  # blobstore
                                  localstore.download(oid, req)
                              else:
                                  while True:
                                      data = req.read(1048576)
                                      if not data:
                                          break
                                      response += data
                                  if response:
                                      ui.debug('lfs %s response: %s' % (action, response))
                      except util.urlerr.httperror as ex:
                          if self.ui.debugflag:
                              self.ui.debug('%s: %s\n' % (oid, ex.read()))
                          raise LfsRemoteError(_('LFS HTTP error: %s (oid=%s, action=%s)')
                                               % (ex, oid, action))
                      except util.urlerr.urlerror as ex:
                          hint = (_('attempted connection to %s')
                                  % util.urllibcompat.getfullurl(request))
                          raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
                                               hint=hint)
                  def _batch(self, pointers, localstore, action):
                      if action not in ['upload', 'download']:
                          raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
                      response = self._batchrequest(pointers, action)
                      objects = self._extractobjects(response, pointers, action)
                      total = sum(x.get('size', 0) for x in objects)
                      sizes = {}
                      for obj in objects:
                          sizes[obj.get('oid')] = obj.get('size', 0)
                      topic = {'upload': _('lfs uploading'),
                               'download': _('lfs downloading')}[action]
                      if len(objects) > 1:
                          self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
                                       % (len(objects), util.bytecount(total)))
                      def transfer(chunk):
                          for obj in chunk:
                              objsize = obj.get('size', 0)
                              if self.ui.verbose:
                                  if action == 'download':
                                      msg = _('lfs: downloading %s (%s)\n')
                                  elif action == 'upload':
                                      msg = _('lfs: uploading %s (%s)\n')
                                  self.ui.note(msg % (obj.get('oid'),
                                               util.bytecount(objsize)))
                              retry = self.retry
                              while True:
                                  try:
                                      self._basictransfer(obj, action, localstore)
                                      yield 1, obj.get('oid')
                                      break
                                  except socket.error as ex:
                                      if retry > 0:
                                          self.ui.note(
                                              _('lfs: failed: %r (remaining retry %d)\n')
                                              % (ex, retry))
                                          retry -= 1
                                          continue
                                      raise
                      # Until https multiplexing gets sorted out
                      if self.ui.configbool('experimental', 'lfs.worker-enable'):
                          oids = worker.worker(self.ui, 0.1, transfer, (),
                                               sorted(objects, key=lambda o: o.get('oid')))
                      else:
                          oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
                      with self.ui.makeprogress(topic, total=total) as progress:
                          progress.update(0)
                          processed = 0
                          blobs = 0
                          for _one, oid in oids:
                              processed += sizes[oid]
                              blobs += 1
                              progress.update(processed)
                              self.ui.note(_('lfs: processed: %s\n') % oid)
                      if blobs > 0:
                          if action == 'upload':
                              self.ui.status(_('lfs: uploaded %d files (%s)\n')
                                             % (blobs, util.bytecount(processed)))
                          elif action == 'download':
                              self.ui.status(_('lfs: downloaded %d files (%s)\n')
                                             % (blobs, util.bytecount(processed)))
                  def __del__(self):
                      # copied from mercurial/httppeer.py
                      urlopener = getattr(self, 'urlopener', None)
                      if urlopener:
                          for h in urlopener.handlers:
                              h.close()
                              getattr(h, "close_all", lambda : None)()
              class _dummyremote(object):
                  """Dummy store storing blobs to temp directory."""
                  def __init__(self, repo, url):
                      fullpath = repo.vfs.join('lfs', url.path)
                      self.vfs = lfsvfs(fullpath)
                  def writebatch(self, pointers, fromstore):
                      for p in _deduplicate(pointers):
                          content = fromstore.read(p.oid(), verify=True)
                          with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
                              fp.write(content)
                  def readbatch(self, pointers, tostore):
                      for p in _deduplicate(pointers):
                          with self.vfs(p.oid(), 'rb') as fp:
                              tostore.download(p.oid(), fp)
              class _nullremote(object):
                  """Null store storing blobs to /dev/null."""
                  def __init__(self, repo, url):
                      pass
                  def writebatch(self, pointers, fromstore):
                      pass
                  def readbatch(self, pointers, tostore):
                      pass
              class _promptremote(object):
                  """Prompt user to set lfs.url when accessed."""
                  def __init__(self, repo, url):
                      pass
                  def writebatch(self, pointers, fromstore, ui=None):
                      self._prompt()
                  def readbatch(self, pointers, tostore, ui=None):
                      self._prompt()
                  def _prompt(self):
                      raise error.Abort(_('lfs.url needs to be configured'))
              _storemap = {
                  'https': _gitlfsremote,
                  'http': _gitlfsremote,
                  'file': _dummyremote,
                  'null': _nullremote,
                  None: _promptremote,
              }
              def _deduplicate(pointers):
                  """Remove any duplicate oids that exist in the list"""
                  reduced = util.sortdict()
                  for p in pointers:
                      reduced[p.oid()] = p
                  return reduced.values()
              def _verify(oid, content):
-                 realoid = hashlib.sha256(content).hexdigest()
+                 realoid = node.hex(hashlib.sha256(content).digest())
                  if realoid != oid:
                      raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
                                               hint=_('run hg verify'))
              def remote(repo, remote=None):
                  """remotestore factory. return a store in _storemap depending on config
                  If ``lfs.url`` is specified, use that remote endpoint.  Otherwise, try to
                  infer the endpoint, based on the remote repository using the same path
                  adjustments as git.  As an extension, 'http' is supported as well so that
                  ``hg serve`` works out of the box.
                  https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
                  """
                  lfsurl = repo.ui.config('lfs', 'url')
                  url = util.url(lfsurl or '')
                  if lfsurl is None:
                      if remote:
                          path = remote
                      elif util.safehasattr(repo, '_subtoppath'):
                          # The pull command sets this during the optional update phase, which
                          # tells exactly where the pull originated, whether 'paths.default'
                          # or explicit.
                          path = repo._subtoppath
                      else:
                          # TODO: investigate 'paths.remote:lfsurl' style path customization,
                          # and fall back to inferring from 'paths.remote' if unspecified.
                          path = repo.ui.config('paths', 'default') or ''
                      defaulturl = util.url(path)
                      # TODO: support local paths as well.
                      # TODO: consider the ssh -> https transformation that git applies
                      if defaulturl.scheme in (b'http', b'https'):
                          if defaulturl.path and defaulturl.path[:-1] != b'/':
                              defaulturl.path += b'/'
                          defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
                          url = util.url(bytes(defaulturl))
                          repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
                  scheme = url.scheme
                  if scheme not in _storemap:
                      raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
                  return _storemap[scheme](repo, url)
              class LfsRemoteError(error.StorageError):
                  pass
              class LfsCorruptionError(error.Abort):
                  """Raised when a corrupt blob is detected, aborting an operation
                  It exists to allow specialized handling on the server side."""

hgext/remotefilelog/basepack.py

0 +2 -1

              from __future__ import absolute_import
              import collections
              import errno
              import hashlib
              import mmap
              import os
              import struct
              import time
              from mercurial.i18n import _
              from mercurial import (
+                 node as nodemod,
                  policy,
                  pycompat,
                  util,
                  vfs as vfsmod,
              )
              from . import shallowutil
              osutil = policy.importmod(r'osutil')
              # The pack version supported by this implementation. This will need to be
              # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
              # changing any of the int sizes, changing the delta algorithm, etc.
              PACKVERSIONSIZE = 1
              INDEXVERSIONSIZE = 2
              FANOUTSTART = INDEXVERSIONSIZE
              # Constant that indicates a fanout table entry hasn't been filled in. (This does
              # not get serialized)
              EMPTYFANOUT = -1
              # The fanout prefix is the number of bytes that can be addressed by the fanout
              # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
              # look in the fanout table (which will be 2^8 entries long).
              SMALLFANOUTPREFIX = 1
              LARGEFANOUTPREFIX = 2
              # The number of entries in the index at which point we switch to a large fanout.
              # It is chosen to balance the linear scan through a sparse fanout, with the
              # size of the bisect in actual index.
              # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
              # bisect) with (8 step fanout scan + 1 step bisect)
              # 5 step bisect = log(2^16 / 8 / 255)  # fanout
              # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
              SMALLFANOUTCUTOFF = 2**16 / 8
              # The amount of time to wait between checking for new packs. This prevents an
              # exception when data is moved to a new pack after the process has already
              # loaded the pack list.
              REFRESHRATE = 0.1
              if pycompat.isposix:
                  # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                  # The 'e' flag will be ignored on older versions of glibc.
                  PACKOPENMODE = 'rbe'
              else:
                  PACKOPENMODE = 'rb'
              class _cachebackedpacks(object):
                  def __init__(self, packs, cachesize):
                      self._packs = set(packs)
                      self._lrucache = util.lrucachedict(cachesize)
                      self._lastpack = None
                      # Avoid cold start of the cache by populating the most recent packs
                      # in the cache.
                      for i in reversed(range(min(cachesize, len(packs)))):
                          self._movetofront(packs[i])
                  def _movetofront(self, pack):
                      # This effectively makes pack the first entry in the cache.
                      self._lrucache[pack] = True
                  def _registerlastpackusage(self):
                      if self._lastpack is not None:
                          self._movetofront(self._lastpack)
                          self._lastpack = None
                  def add(self, pack):
                      self._registerlastpackusage()
                      # This method will mostly be called when packs are not in cache.
                      # Therefore, adding pack to the cache.
                      self._movetofront(pack)
                      self._packs.add(pack)
                  def __iter__(self):
                      self._registerlastpackusage()
                      # Cache iteration is based on LRU.
                      for pack in self._lrucache:
                          self._lastpack = pack
                          yield pack
                      cachedpacks = set(pack for pack in self._lrucache)
                      # Yield for paths not in the cache.
                      for pack in self._packs - cachedpacks:
                          self._lastpack = pack
                          yield pack
                      # Data not found in any pack.
                      self._lastpack = None
              class basepackstore(object):
                  # Default cache size limit for the pack files.
                  DEFAULTCACHESIZE = 100
                  def __init__(self, ui, path):
                      self.ui = ui
                      self.path = path
                      # lastrefesh is 0 so we'll immediately check for new packs on the first
                      # failure.
                      self.lastrefresh = 0
                      packs = []
                      for filepath, __, __ in self._getavailablepackfilessorted():
                          try:
                              pack = self.getpack(filepath)
                          except Exception as ex:
                              # An exception may be thrown if the pack file is corrupted
                              # somehow.  Log a warning but keep going in this case, just
                              # skipping this pack file.
                              #
                              # If this is an ENOENT error then don't even bother logging.
                              # Someone could have removed the file since we retrieved the
                              # list of paths.
                              if getattr(ex, 'errno', None) != errno.ENOENT:
                                  ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
                              continue
                          packs.append(pack)
                      self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                  def _getavailablepackfiles(self):
                      """For each pack file (a index/data file combo), yields:
                        (full path without extension, mtime, size)
                      mtime will be the mtime of the index/data file (whichever is newer)
                      size is the combined size of index/data file
                      """
                      indexsuffixlen = len(self.INDEXSUFFIX)
                      packsuffixlen = len(self.PACKSUFFIX)
                      ids = set()
                      sizes = collections.defaultdict(lambda: 0)
                      mtimes = collections.defaultdict(lambda: [])
                      try:
                          for filename, type, stat in osutil.listdir(self.path, stat=True):
                              id = None
                              if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                  id = filename[:-indexsuffixlen]
                              elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                  id = filename[:-packsuffixlen]
                              # Since we expect to have two files corresponding to each ID
                              # (the index file and the pack file), we can yield once we see
                              # it twice.
                              if id:
                                  sizes[id] += stat.st_size # Sum both files' sizes together
                                  mtimes[id].append(stat.st_mtime)
                                  if id in ids:
                                      yield (os.path.join(self.path, id), max(mtimes[id]),
                                          sizes[id])
                                  else:
                                      ids.add(id)
                      except OSError as ex:
                          if ex.errno != errno.ENOENT:
                              raise
                  def _getavailablepackfilessorted(self):
                      """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                      yielding newest files first.
                      This is desirable, since it is more likely newer packfiles have more
                      desirable data.
                      """
                      files = []
                      for path, mtime, size in self._getavailablepackfiles():
                          files.append((mtime, size, path))
                      files = sorted(files, reverse=True)
                      for mtime, size, path in files:
                          yield path, mtime, size
                  def gettotalsizeandcount(self):
                      """Returns the total disk size (in bytes) of all the pack files in
                      this store, and the count of pack files.
                      (This might be smaller than the total size of the ``self.path``
                      directory, since this only considers fuly-writen pack files, and not
                      temporary files or other detritus on the directory.)
                      """
                      totalsize = 0
                      count = 0
                      for __, __, size in self._getavailablepackfiles():
                          totalsize += size
                          count += 1
                      return totalsize, count
                  def getmetrics(self):
                      """Returns metrics on the state of this store."""
                      size, count = self.gettotalsizeandcount()
                      return {
                          'numpacks': count,
                          'totalpacksize': size,
                      }
                  def getpack(self, path):
                      raise NotImplementedError()
                  def getmissing(self, keys):
                      missing = keys
                      for pack in self.packs:
                          missing = pack.getmissing(missing)
                          # Ensures better performance of the cache by keeping the most
                          # recently accessed pack at the beginning in subsequent iterations.
                          if not missing:
                              return missing
                      if missing:
                          for pack in self.refresh():
                              missing = pack.getmissing(missing)
                      return missing
                  def markledger(self, ledger, options=None):
                      for pack in self.packs:
                          pack.markledger(ledger)
                  def markforrefresh(self):
                      """Tells the store that there may be new pack files, so the next time it
                      has a lookup miss it should check for new files."""
                      self.lastrefresh = 0
                  def refresh(self):
                      """Checks for any new packs on disk, adds them to the main pack list,
                      and returns a list of just the new packs."""
                      now = time.time()
                      # If we experience a lot of misses (like in the case of getmissing() on
                      # new objects), let's only actually check disk for new stuff every once
                      # in a while. Generally this code path should only ever matter when a
                      # repack is going on in the background, and that should be pretty rare
                      # to have that happen twice in quick succession.
                      newpacks = []
                      if now > self.lastrefresh + REFRESHRATE:
                          self.lastrefresh = now
                          previous = set(p.path for p in self.packs)
                          for filepath, __, __ in self._getavailablepackfilessorted():
                              if filepath not in previous:
                                  newpack = self.getpack(filepath)
                                  newpacks.append(newpack)
                                  self.packs.add(newpack)
                      return newpacks
              class versionmixin(object):
                  # Mix-in for classes with multiple supported versions
                  VERSION = None
                  SUPPORTED_VERSIONS = [2]
                  def _checkversion(self, version):
                      if version in self.SUPPORTED_VERSIONS:
                          if self.VERSION is None:
                              # only affect this instance
                              self.VERSION = version
                          elif self.VERSION != version:
                              raise RuntimeError('inconsistent version: %s' % version)
                      else:
                          raise RuntimeError('unsupported version: %s' % version)
              class basepack(versionmixin):
                  # The maximum amount we should read via mmap before remmaping so the old
                  # pages can be released (100MB)
                  MAXPAGEDIN = 100 * 1024**2
                  SUPPORTED_VERSIONS = [2]
                  def __init__(self, path):
                      self.path = path
                      self.packpath = path + self.PACKSUFFIX
                      self.indexpath = path + self.INDEXSUFFIX
                      self.indexsize = os.stat(self.indexpath).st_size
                      self.datasize = os.stat(self.packpath).st_size
                      self._index = None
                      self._data = None
                      self.freememory() # initialize the mmap
                      version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
                      self._checkversion(version)
                      version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
                      self._checkversion(version)
                      if 0b10000000 & config:
                          self.params = indexparams(LARGEFANOUTPREFIX, version)
                      else:
                          self.params = indexparams(SMALLFANOUTPREFIX, version)
                  @util.propertycache
                  def _fanouttable(self):
                      params = self.params
                      rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
                      fanouttable = []
                      for i in pycompat.xrange(0, params.fanoutcount):
                          loc = i * 4
                          fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
                          fanouttable.append(fanoutentry)
                      return fanouttable
                  @util.propertycache
                  def _indexend(self):
                      nodecount = struct.unpack_from('!Q', self._index,
                                                     self.params.indexstart - 8)[0]
                      return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                  def freememory(self):
                      """Unmap and remap the memory to free it up after known expensive
                      operations. Return True if self._data and self._index were reloaded.
                      """
                      if self._index:
                          if self._pagedin < self.MAXPAGEDIN:
                              return False
                          self._index.close()
                          self._data.close()
                      # TODO: use an opener/vfs to access these paths
                      with open(self.indexpath, PACKOPENMODE) as indexfp:
                          # memory-map the file, size 0 means whole file
                          self._index = mmap.mmap(indexfp.fileno(), 0,
                                                  access=mmap.ACCESS_READ)
                      with open(self.packpath, PACKOPENMODE) as datafp:
                          self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                      self._pagedin = 0
                      return True
                  def getmissing(self, keys):
                      raise NotImplementedError()
                  def markledger(self, ledger, options=None):
                      raise NotImplementedError()
                  def cleanup(self, ledger):
                      raise NotImplementedError()
                  def __iter__(self):
                      raise NotImplementedError()
                  def iterentries(self):
                      raise NotImplementedError()
              class mutablebasepack(versionmixin):
                  def __init__(self, ui, packdir, version=2):
                      self._checkversion(version)
                      # TODO(augie): make this configurable
                      self._compressor = 'GZ'
                      opener = vfsmod.vfs(packdir)
                      opener.createmode = 0o444
                      self.opener = opener
                      self.entries = {}
                      shallowutil.mkstickygroupdir(ui, packdir)
                      self.packfp, self.packpath = opener.mkstemp(
                          suffix=self.PACKSUFFIX + '-tmp')
                      self.idxfp, self.idxpath = opener.mkstemp(
                          suffix=self.INDEXSUFFIX + '-tmp')
                      self.packfp = os.fdopen(self.packfp, r'wb+')
                      self.idxfp = os.fdopen(self.idxfp, r'wb+')
                      self.sha = hashlib.sha1()
                      self._closed = False
                      # The opener provides no way of doing permission fixup on files created
                      # via mkstemp, so we must fix it ourselves. We can probably fix this
                      # upstream in vfs.mkstemp so we don't need to use the private method.
                      opener._fixfilemode(opener.join(self.packpath))
                      opener._fixfilemode(opener.join(self.idxpath))
                      # Write header
                      # TODO: make it extensible (ex: allow specifying compression algorithm,
                      # a flexible key/value header, delta algorithm, fanout size, etc)
                      versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
                      self.writeraw(versionbuf)
                  def __enter__(self):
                      return self
                  def __exit__(self, exc_type, exc_value, traceback):
                      if exc_type is None:
                          self.close()
                      else:
                          self.abort()
                  def abort(self):
                      # Unclean exit
                      self._cleantemppacks()
                  def writeraw(self, data):
                      self.packfp.write(data)
                      self.sha.update(data)
                  def close(self, ledger=None):
                      if self._closed:
                          return
                      try:
-                         sha = self.sha.hexdigest()
+                         sha = nodemod.hex(self.sha.digest())
                          self.packfp.close()
                          self.writeindex()
                          if len(self.entries) == 0:
                              # Empty pack
                              self._cleantemppacks()
                              self._closed = True
                              return None
                          self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                          try:
                              self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                          except Exception as ex:
                              try:
                                  self.opener.unlink(sha + self.PACKSUFFIX)
                              except Exception:
                                  pass
                              # Throw exception 'ex' explicitly since a normal 'raise' would
                              # potentially throw an exception from the unlink cleanup.
                              raise ex
                      except Exception:
                          # Clean up temp packs in all exception cases
                          self._cleantemppacks()
                          raise
                      self._closed = True
                      result = self.opener.join(sha)
                      if ledger:
                          ledger.addcreated(result)
                      return result
                  def _cleantemppacks(self):
                      try:
                          self.opener.unlink(self.packpath)
                      except Exception:
                          pass
                      try:
                          self.opener.unlink(self.idxpath)
                      except Exception:
                          pass
                  def writeindex(self):
                      rawindex = ''
                      largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                      if largefanout:
                          params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                      else:
                          params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                      fanouttable = [EMPTYFANOUT] * params.fanoutcount
                      # Precompute the location of each entry
                      locations = {}
                      count = 0
                      for node in sorted(self.entries):
                          location = count * self.INDEXENTRYLENGTH
                          locations[node] = location
                          count += 1
                          # Must use [0] on the unpack result since it's always a tuple.
                          fanoutkey = struct.unpack(params.fanoutstruct,
                                                    node[:params.fanoutprefix])[0]
                          if fanouttable[fanoutkey] == EMPTYFANOUT:
                              fanouttable[fanoutkey] = location
                      rawfanouttable = ''
                      last = 0
                      for offset in fanouttable:
                          offset = offset if offset != EMPTYFANOUT else last
                          last = offset
                          rawfanouttable += struct.pack('!I', offset)
                      rawentrieslength = struct.pack('!Q', len(self.entries))
                      # The index offset is the it's location in the file. So after the 2 byte
                      # header and the fanouttable.
                      rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                      self._writeheader(params)
                      self.idxfp.write(rawfanouttable)
                      self.idxfp.write(rawentrieslength)
                      self.idxfp.write(rawindex)
                      self.idxfp.close()
                  def createindex(self, nodelocations):
                      raise NotImplementedError()
                  def _writeheader(self, indexparams):
                      # Index header
                      #    <version: 1 byte>
                      #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                      #    <unused: 7 bit> # future use (compression, delta format, etc)
                      config = 0
                      if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                          config = 0b10000000
                      self.idxfp.write(struct.pack('!BB', self.VERSION, config))
              class indexparams(object):
                  __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
                               r'fanoutsize', r'indexstart')
                  def __init__(self, prefixsize, version):
                      self.fanoutprefix = prefixsize
                      # The struct pack format for fanout table location (i.e. the format that
                      # converts the node prefix into an integer location in the fanout
                      # table).
                      if prefixsize == SMALLFANOUTPREFIX:
                          self.fanoutstruct = '!B'
                      elif prefixsize == LARGEFANOUTPREFIX:
                          self.fanoutstruct = '!H'
                      else:
                          raise ValueError("invalid fanout prefix size: %s" % prefixsize)
                      # The number of fanout table entries
                      self.fanoutcount = 2**(prefixsize * 8)
                      # The total bytes used by the fanout table
                      self.fanoutsize = self.fanoutcount * 4
                      self.indexstart = FANOUTSTART + self.fanoutsize
                      # Skip the index length
                      self.indexstart += 8

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages