upstream/mercurial-mirror Commit - r44519:2d49482d

hgext: replace references to hashlib.sha1 with hashutil.sha1...

Augie Fackler -

r44519:2d49482d default

parent child

hgext/fastannotate/context.py

0 +5 -3

              # Copyright 2016-present Facebook. All Rights Reserved.
              #
              # context: context needed to annotate a file
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import collections
              import contextlib
-             import hashlib
              import os
              from mercurial.i18n import _
              from mercurial.pycompat import (
                  getattr,
                  open,
                  setattr,
              )
              from mercurial import (
                  error,
                  linelog as linelogmod,
                  lock as lockmod,
                  mdiff,
                  node,
                  pycompat,
                  scmutil,
                  util,
              )
-             from mercurial.utils import stringutil
+             from mercurial.utils import (
+                 hashutil,
+                 stringutil,
+             )
              from . import (
                  error as faerror,
                  revmap as revmapmod,
              )
              # given path, get filelog, cached
              @util.lrucachefunc
              def _getflog(repo, path):
                  return repo.file(path)
              # extracted from mercurial.context.basefilectx.annotate
              def _parents(f, follow=True):
                  # Cut _descendantrev here to mitigate the penalty of lazy linkrev
                  # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
                  # from the topmost introrev (= srcrev) down to p.linkrev() if it
                  # isn't an ancestor of the srcrev.
                  f._changeid
                  pl = f.parents()
                  # Don't return renamed parents if we aren't following.
                  if not follow:
                      pl = [p for p in pl if p.path() == f.path()]
                  # renamed filectx won't have a filelog yet, so set it
                  # from the cache to save time
                  for p in pl:
                      if not '_filelog' in p.__dict__:
                          p._filelog = _getflog(f._repo, p.path())
                  return pl
              # extracted from mercurial.context.basefilectx.annotate. slightly modified
              # so it takes a fctx instead of a pair of text and fctx.
              def _decorate(fctx):
                  text = fctx.data()
                  linecount = text.count(b'\n')
                  if text and not text.endswith(b'\n'):
                      linecount += 1
                  return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
              # extracted from mercurial.context.basefilectx.annotate. slightly modified
              # so it takes an extra "blocks" parameter calculated elsewhere, instead of
              # calculating diff here.
              def _pair(parent, child, blocks):
                  for (a1, a2, b1, b2), t in blocks:
                      # Changed blocks ('!') or blocks made only of blank lines ('~')
                      # belong to the child.
                      if t == b'=':
                          child[0][b1:b2] = parent[0][a1:a2]
                  return child
              # like scmutil.revsingle, but with lru cache, so their states (like manifests)
              # could be reused
              _revsingle = util.lrucachefunc(scmutil.revsingle)
              def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
                  """(repo, str, str) -> fctx
                  get the filectx object from repo, rev, path, in an efficient way.
                  if resolverev is True, "rev" is a revision specified by the revset
                  language, otherwise "rev" is a nodeid, or a revision number that can
                  be consumed by repo.__getitem__.
                  if adjustctx is not None, the returned fctx will point to a changeset
                  that introduces the change (last modified the file). if adjustctx
                  is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
                  faster for big repos but is incorrect for some cases.
                  """
                  if resolverev and not isinstance(rev, int) and rev is not None:
                      ctx = _revsingle(repo, rev)
                  else:
                      ctx = repo[rev]
                  # If we don't need to adjust the linkrev, create the filectx using the
                  # changectx instead of using ctx[path]. This means it already has the
                  # changectx information, so blame -u will be able to look directly at the
                  # commitctx object instead of having to resolve it by going through the
                  # manifest. In a lazy-manifest world this can prevent us from downloading a
                  # lot of data.
                  if adjustctx is None:
                      # ctx.rev() is None means it's the working copy, which is a special
                      # case.
                      if ctx.rev() is None:
                          fctx = ctx[path]
                      else:
                          fctx = repo.filectx(path, changeid=ctx.rev())
                  else:
                      fctx = ctx[path]
                      if adjustctx == b'linkrev':
                          introrev = fctx.linkrev()
                      else:
                          introrev = fctx.introrev()
                      if introrev != ctx.rev():
                          fctx._changeid = introrev
                          fctx._changectx = repo[introrev]
                  return fctx
              # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
              def encodedir(path):
                  return (
                      path.replace(b'.hg/', b'.hg.hg/')
                      .replace(b'.l/', b'.l.hg/')
                      .replace(b'.m/', b'.m.hg/')
                      .replace(b'.lock/', b'.lock.hg/')
                  )
              def hashdiffopts(diffopts):
                  diffoptstr = stringutil.pprint(
                      sorted((k, getattr(diffopts, k)) for k in mdiff.diffopts.defaults)
                  )
-                 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
+                 return node.hex(hashutil.sha1(diffoptstr).digest())[:6]
              _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
              class annotateopts(object):
                  """like mercurial.mdiff.diffopts, but is for annotate
                  followrename: follow renames, like "hg annotate -f"
                  followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
                  """
                  defaults = {
                      b'diffopts': None,
                      b'followrename': True,
                      b'followmerge': True,
                  }
                  def __init__(self, **opts):
                      opts = pycompat.byteskwargs(opts)
                      for k, v in pycompat.iteritems(self.defaults):
                          setattr(self, k, opts.get(k, v))
                  @util.propertycache
                  def shortstr(self):
                      """represent opts in a short string, suitable for a directory name"""
                      result = b''
                      if not self.followrename:
                          result += b'r0'
                      if not self.followmerge:
                          result += b'm0'
                      if self.diffopts is not None:
                          assert isinstance(self.diffopts, mdiff.diffopts)
                          diffopthash = hashdiffopts(self.diffopts)
                          if diffopthash != _defaultdiffopthash:
                              result += b'i' + diffopthash
                      return result or b'default'
              defaultopts = annotateopts()
              class _annotatecontext(object):
                  """do not use this class directly as it does not use lock to protect
                  writes. use "with annotatecontext(...)" instead.
                  """
                  def __init__(self, repo, path, linelogpath, revmappath, opts):
                      self.repo = repo
                      self.ui = repo.ui
                      self.path = path
                      self.opts = opts
                      self.linelogpath = linelogpath
                      self.revmappath = revmappath
                      self._linelog = None
                      self._revmap = None
                      self._node2path = {}  # {str: str}
                  @property
                  def linelog(self):
                      if self._linelog is None:
                          if os.path.exists(self.linelogpath):
                              with open(self.linelogpath, b'rb') as f:
                                  try:
                                      self._linelog = linelogmod.linelog.fromdata(f.read())
                                  except linelogmod.LineLogError:
                                      self._linelog = linelogmod.linelog()
                          else:
                              self._linelog = linelogmod.linelog()
                      return self._linelog
                  @property
                  def revmap(self):
                      if self._revmap is None:
                          self._revmap = revmapmod.revmap(self.revmappath)
                      return self._revmap
                  def close(self):
                      if self._revmap is not None:
                          self._revmap.flush()
                          self._revmap = None
                      if self._linelog is not None:
                          with open(self.linelogpath, b'wb') as f:
                              f.write(self._linelog.encode())
                          self._linelog = None
                  __del__ = close
                  def rebuild(self):
                      """delete linelog and revmap, useful for rebuilding"""
                      self.close()
                      self._node2path.clear()
                      _unlinkpaths([self.revmappath, self.linelogpath])
                  @property
                  def lastnode(self):
                      """return last node in revmap, or None if revmap is empty"""
                      if self._revmap is None:
                          # fast path, read revmap without loading its full content
                          return revmapmod.getlastnode(self.revmappath)
                      else:
                          return self._revmap.rev2hsh(self._revmap.maxrev)
                  def isuptodate(self, master, strict=True):
                      """return True if the revmap / linelog is up-to-date, or the file
                      does not exist in the master revision. False otherwise.
                      it tries to be fast and could return false negatives, because of the
                      use of linkrev instead of introrev.
                      useful for both server and client to decide whether to update
                      fastannotate cache or not.
                      if strict is True, even if fctx exists in the revmap, but is not the
                      last node, isuptodate will return False. it's good for performance - no
                      expensive check was done.
                      if strict is False, if fctx exists in the revmap, this function may
                      return True. this is useful for the client to skip downloading the
                      cache if the client's master is behind the server's.
                      """
                      lastnode = self.lastnode
                      try:
                          f = self._resolvefctx(master, resolverev=True)
                          # choose linkrev instead of introrev as the check is meant to be
                          # *fast*.
                          linknode = self.repo.changelog.node(f.linkrev())
                          if not strict and lastnode and linknode != lastnode:
                              # check if f.node() is in the revmap. note: this loads the
                              # revmap and can be slow.
                              return self.revmap.hsh2rev(linknode) is not None
                          # avoid resolving old manifest, or slow adjustlinkrev to be fast,
                          # false negatives are acceptable in this case.
                          return linknode == lastnode
                      except LookupError:
                          # master does not have the file, or the revmap is ahead
                          return True
                  def annotate(self, rev, master=None, showpath=False, showlines=False):
                      """incrementally update the cache so it includes revisions in the main
                      branch till 'master'. and run annotate on 'rev', which may or may not be
                      included in the main branch.
                      if master is None, do not update linelog.
                      the first value returned is the annotate result, it is [(node, linenum)]
                      by default. [(node, linenum, path)] if showpath is True.
                      if showlines is True, a second value will be returned, it is a list of
                      corresponding line contents.
                      """
                      # the fast path test requires commit hash, convert rev number to hash,
                      # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
                      # command could give us a revision number even if the user passes a
                      # commit hash.
                      if isinstance(rev, int):
                          rev = node.hex(self.repo.changelog.node(rev))
                      # fast path: if rev is in the main branch already
                      directly, revfctx = self.canannotatedirectly(rev)
                      if directly:
                          if self.ui.debugflag:
                              self.ui.debug(
                                  b'fastannotate: %s: using fast path '
                                  b'(resolved fctx: %s)\n'
                                  % (
                                      self.path,
                                      stringutil.pprint(util.safehasattr(revfctx, b'node')),
                                  )
                              )
                          return self.annotatedirectly(revfctx, showpath, showlines)
                      # resolve master
                      masterfctx = None
                      if master:
                          try:
                              masterfctx = self._resolvefctx(
                                  master, resolverev=True, adjustctx=True
                              )
                          except LookupError:  # master does not have the file
                              pass
                          else:
                              if masterfctx in self.revmap:  # no need to update linelog
                                  masterfctx = None
                      #                  ... - @ <- rev (can be an arbitrary changeset,
                      #                 /                not necessarily a descendant
                      #      master -> o                 of master)
                      #                |
                      #     a merge -> o         'o': new changesets in the main branch
                      #                |\        '#': revisions in the main branch that
                      #                o *            exist in linelog / revmap
                      #                | .       '*': changesets in side branches, or
                      # last master -> # .            descendants of master
                      #                | .
                      #                # *       joint: '#', and is a parent of a '*'
                      #                |/
                      #     a joint -> # ^^^^ --- side branches
                      #                |
                      #                ^ --- main branch (in linelog)
                      # these DFSes are similar to the traditional annotate algorithm.
                      # we cannot really reuse the code for perf reason.
                      # 1st DFS calculates merges, joint points, and needed.
                      # "needed" is a simple reference counting dict to free items in
                      # "hist", reducing its memory usage otherwise could be huge.
                      initvisit = [revfctx]
                      if masterfctx:
                          if masterfctx.rev() is None:
                              raise error.Abort(
                                  _(b'cannot update linelog to wdir()'),
                                  hint=_(b'set fastannotate.mainbranch'),
                              )
                          initvisit.append(masterfctx)
                      visit = initvisit[:]
                      pcache = {}
                      needed = {revfctx: 1}
                      hist = {}  # {fctx: ([(llrev or fctx, linenum)], text)}
                      while visit:
                          f = visit.pop()
                          if f in pcache or f in hist:
                              continue
                          if f in self.revmap:  # in the old main branch, it's a joint
                              llrev = self.revmap.hsh2rev(f.node())
                              self.linelog.annotate(llrev)
                              result = self.linelog.annotateresult
                              hist[f] = (result, f.data())
                              continue
                          pl = self._parentfunc(f)
                          pcache[f] = pl
                          for p in pl:
                              needed[p] = needed.get(p, 0) + 1
                              if p not in pcache:
                                  visit.append(p)
                      # 2nd (simple) DFS calculates new changesets in the main branch
                      # ('o' nodes in # the above graph), so we know when to update linelog.
                      newmainbranch = set()
                      f = masterfctx
                      while f and f not in self.revmap:
                          newmainbranch.add(f)
                          pl = pcache[f]
                          if pl:
                              f = pl[0]
                          else:
                              f = None
                              break
                      # f, if present, is the position where the last build stopped at, and
                      # should be the "master" last time. check to see if we can continue
                      # building the linelog incrementally. (we cannot if diverged)
                      if masterfctx is not None:
                          self._checklastmasterhead(f)
                      if self.ui.debugflag:
                          if newmainbranch:
                              self.ui.debug(
                                  b'fastannotate: %s: %d new changesets in the main'
                                  b' branch\n' % (self.path, len(newmainbranch))
                              )
                          elif not hist:  # no joints, no updates
                              self.ui.debug(
                                  b'fastannotate: %s: linelog cannot help in '
                                  b'annotating this revision\n' % self.path
                              )
                      # prepare annotateresult so we can update linelog incrementally
                      self.linelog.annotate(self.linelog.maxrev)
                      # 3rd DFS does the actual annotate
                      visit = initvisit[:]
                      progress = self.ui.makeprogress(
                          b'building cache', total=len(newmainbranch)
                      )
                      while visit:
                          f = visit[-1]
                          if f in hist:
                              visit.pop()
                              continue
                          ready = True
                          pl = pcache[f]
                          for p in pl:
                              if p not in hist:
                                  ready = False
                                  visit.append(p)
                          if not ready:
                              continue
                          visit.pop()
                          blocks = None  # mdiff blocks, used for appending linelog
                          ismainbranch = f in newmainbranch
                          # curr is the same as the traditional annotate algorithm,
                          # if we only care about linear history (do not follow merge),
                          # then curr is not actually used.
                          assert f not in hist
                          curr = _decorate(f)
                          for i, p in enumerate(pl):
                              bs = list(self._diffblocks(hist[p][1], curr[1]))
                              if i == 0 and ismainbranch:
                                  blocks = bs
                              curr = _pair(hist[p], curr, bs)
                              if needed[p] == 1:
                                  del hist[p]
                                  del needed[p]
                              else:
                                  needed[p] -= 1
                          hist[f] = curr
                          del pcache[f]
                          if ismainbranch:  # need to write to linelog
                              progress.increment()
                              bannotated = None
                              if len(pl) == 2 and self.opts.followmerge:  # merge
                                  bannotated = curr[0]
                              if blocks is None:  # no parents, add an empty one
                                  blocks = list(self._diffblocks(b'', curr[1]))
                              self._appendrev(f, blocks, bannotated)
                          elif showpath:  # not append linelog, but we need to record path
                              self._node2path[f.node()] = f.path()
                      progress.complete()
                      result = [
                          ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
                          for fr, l in hist[revfctx][0]
                      ]  # [(node, linenumber)]
                      return self._refineannotateresult(result, revfctx, showpath, showlines)
                  def canannotatedirectly(self, rev):
                      """(str) -> bool, fctx or node.
                      return (True, f) if we can annotate without updating the linelog, pass
                      f to annotatedirectly.
                      return (False, f) if we need extra calculation. f is the fctx resolved
                      from rev.
                      """
                      result = True
                      f = None
                      if not isinstance(rev, int) and rev is not None:
                          hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
                          if hsh is not None and (hsh, self.path) in self.revmap:
                              f = hsh
                      if f is None:
                          adjustctx = b'linkrev' if self._perfhack else True
                          f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
                          result = f in self.revmap
                          if not result and self._perfhack:
                              # redo the resolution without perfhack - as we are going to
                              # do write operations, we need a correct fctx.
                              f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
                      return result, f
                  def annotatealllines(self, rev, showpath=False, showlines=False):
                      """(rev : str) -> [(node : str, linenum : int, path : str)]
                      the result has the same format with annotate, but include all (including
                      deleted) lines up to rev. call this after calling annotate(rev, ...) for
                      better performance and accuracy.
                      """
                      revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
                      # find a chain from rev to anything in the mainbranch
                      if revfctx not in self.revmap:
                          chain = [revfctx]
                          a = b''
                          while True:
                              f = chain[-1]
                              pl = self._parentfunc(f)
                              if not pl:
                                  break
                              if pl[0] in self.revmap:
                                  a = pl[0].data()
                                  break
                              chain.append(pl[0])
                          # both self.linelog and self.revmap is backed by filesystem. now
                          # we want to modify them but do not want to write changes back to
                          # files. so we create in-memory objects and copy them. it's like
                          # a "fork".
                          linelog = linelogmod.linelog()
                          linelog.copyfrom(self.linelog)
                          linelog.annotate(linelog.maxrev)
                          revmap = revmapmod.revmap()
                          revmap.copyfrom(self.revmap)
                          for f in reversed(chain):
                              b = f.data()
                              blocks = list(self._diffblocks(a, b))
                              self._doappendrev(linelog, revmap, f, blocks)
                              a = b
                      else:
                          # fastpath: use existing linelog, revmap as we don't write to them
                          linelog = self.linelog
                          revmap = self.revmap
                      lines = linelog.getalllines()
                      hsh = revfctx.node()
                      llrev = revmap.hsh2rev(hsh)
                      result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
                      # cannot use _refineannotateresult since we need custom logic for
                      # resolving line contents
                      if showpath:
                          result = self._addpathtoresult(result, revmap)
                      if showlines:
                          linecontents = self._resolvelines(result, revmap, linelog)
                          result = (result, linecontents)
                      return result
                  def _resolvelines(self, annotateresult, revmap, linelog):
                      """(annotateresult) -> [line]. designed for annotatealllines.
                      this is probably the most inefficient code in the whole fastannotate
                      directory. but we have made a decision that the linelog does not
                      store line contents. so getting them requires random accesses to
                      the revlog data, since they can be many, it can be very slow.
                      """
                      # [llrev]
                      revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
                      result = [None] * len(annotateresult)
                      # {(rev, linenum): [lineindex]}
                      key2idxs = collections.defaultdict(list)
                      for i in pycompat.xrange(len(result)):
                          key2idxs[(revs[i], annotateresult[i][1])].append(i)
                      while key2idxs:
                          # find an unresolved line and its linelog rev to annotate
                          hsh = None
                          try:
                              for (rev, _linenum), idxs in pycompat.iteritems(key2idxs):
                                  if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
                                      continue
                                  hsh = annotateresult[idxs[0]][0]
                                  break
                          except StopIteration:  # no more unresolved lines
                              return result
                          if hsh is None:
                              # the remaining key2idxs are not in main branch, resolving them
                              # using the hard way...
                              revlines = {}
                              for (rev, linenum), idxs in pycompat.iteritems(key2idxs):
                                  if rev not in revlines:
                                      hsh = annotateresult[idxs[0]][0]
                                      if self.ui.debugflag:
                                          self.ui.debug(
                                              b'fastannotate: reading %s line #%d '
                                              b'to resolve lines %r\n'
                                              % (node.short(hsh), linenum, idxs)
                                          )
                                      fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                                      lines = mdiff.splitnewlines(fctx.data())
                                      revlines[rev] = lines
                                  for idx in idxs:
                                      result[idx] = revlines[rev][linenum]
                              assert all(x is not None for x in result)
                              return result
                          # run the annotate and the lines should match to the file content
                          self.ui.debug(
                              b'fastannotate: annotate %s to resolve lines\n'
                              % node.short(hsh)
                          )
                          linelog.annotate(rev)
                          fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
                          annotated = linelog.annotateresult
                          lines = mdiff.splitnewlines(fctx.data())
                          if len(lines) != len(annotated):
                              raise faerror.CorruptedFileError(b'unexpected annotated lines')
                          # resolve lines from the annotate result
                          for i, line in enumerate(lines):
                              k = annotated[i]
                              if k in key2idxs:
                                  for idx in key2idxs[k]:
                                      result[idx] = line
                                  del key2idxs[k]
                      return result
                  def annotatedirectly(self, f, showpath, showlines):
                      """like annotate, but when we know that f is in linelog.
                      f can be either a 20-char str (node) or a fctx. this is for perf - in
                      the best case, the user provides a node and we don't need to read the
                      filelog or construct any filecontext.
                      """
                      if isinstance(f, bytes):
                          hsh = f
                      else:
                          hsh = f.node()
                      llrev = self.revmap.hsh2rev(hsh)
                      if not llrev:
                          raise faerror.CorruptedFileError(
                              b'%s is not in revmap' % node.hex(hsh)
                          )
                      if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
                          raise faerror.CorruptedFileError(
                              b'%s is not in revmap mainbranch' % node.hex(hsh)
                          )
                      self.linelog.annotate(llrev)
                      result = [
                          (self.revmap.rev2hsh(r), l) for r, l in self.linelog.annotateresult
                      ]
                      return self._refineannotateresult(result, f, showpath, showlines)
                  def _refineannotateresult(self, result, f, showpath, showlines):
                      """add the missing path or line contents, they can be expensive.
                      f could be either node or fctx.
                      """
                      if showpath:
                          result = self._addpathtoresult(result)
                      if showlines:
                          if isinstance(f, bytes):  # f: node or fctx
                              llrev = self.revmap.hsh2rev(f)
                              fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
                          else:
                              fctx = f
                          lines = mdiff.splitnewlines(fctx.data())
                          if len(lines) != len(result):  # linelog is probably corrupted
                              raise faerror.CorruptedFileError()
                          result = (result, lines)
                      return result
                  def _appendrev(self, fctx, blocks, bannotated=None):
                      self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
                  def _diffblocks(self, a, b):
                      return mdiff.allblocks(a, b, self.opts.diffopts)
                  @staticmethod
                  def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
                      """append a revision to linelog and revmap"""
                      def getllrev(f):
                          """(fctx) -> int"""
                          # f should not be a linelog revision
                          if isinstance(f, int):
                              raise error.ProgrammingError(b'f should not be an int')
                          # f is a fctx, allocate linelog rev on demand
                          hsh = f.node()
                          rev = revmap.hsh2rev(hsh)
                          if rev is None:
                              rev = revmap.append(hsh, sidebranch=True, path=f.path())
                          return rev
                      # append sidebranch revisions to revmap
                      siderevs = []
                      siderevmap = {}  # node: int
                      if bannotated is not None:
                          for (a1, a2, b1, b2), op in blocks:
                              if op != b'=':
                                  # f could be either linelong rev, or fctx.
                                  siderevs += [
                                      f
                                      for f, l in bannotated[b1:b2]
                                      if not isinstance(f, int)
                                  ]
                      siderevs = set(siderevs)
                      if fctx in siderevs:  # mainnode must be appended seperately
                          siderevs.remove(fctx)
                      for f in siderevs:
                          siderevmap[f] = getllrev(f)
                      # the changeset in the main branch, could be a merge
                      llrev = revmap.append(fctx.node(), path=fctx.path())
                      siderevmap[fctx] = llrev
                      for (a1, a2, b1, b2), op in reversed(blocks):
                          if op == b'=':
                              continue
                          if bannotated is None:
                              linelog.replacelines(llrev, a1, a2, b1, b2)
                          else:
                              blines = [
                                  ((r if isinstance(r, int) else siderevmap[r]), l)
                                  for r, l in bannotated[b1:b2]
                              ]
                              linelog.replacelines_vec(llrev, a1, a2, blines)
                  def _addpathtoresult(self, annotateresult, revmap=None):
                      """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
                      if revmap is None:
                          revmap = self.revmap
                      def _getpath(nodeid):
                          path = self._node2path.get(nodeid)
                          if path is None:
                              path = revmap.rev2path(revmap.hsh2rev(nodeid))
                              self._node2path[nodeid] = path
                          return path
                      return [(n, l, _getpath(n)) for n, l in annotateresult]
                  def _checklastmasterhead(self, fctx):
                      """check if fctx is the master's head last time, raise if not"""
                      if fctx is None:
                          llrev = 0
                      else:
                          llrev = self.revmap.hsh2rev(fctx.node())
                          if not llrev:
                              raise faerror.CannotReuseError()
                      if self.linelog.maxrev != llrev:
                          raise faerror.CannotReuseError()
                  @util.propertycache
                  def _parentfunc(self):
                      """-> (fctx) -> [fctx]"""
                      followrename = self.opts.followrename
                      followmerge = self.opts.followmerge
                      def parents(f):
                          pl = _parents(f, follow=followrename)
                          if not followmerge:
                              pl = pl[:1]
                          return pl
                      return parents
                  @util.propertycache
                  def _perfhack(self):
                      return self.ui.configbool(b'fastannotate', b'perfhack')
                  def _resolvefctx(self, rev, path=None, **kwds):
                      return resolvefctx(self.repo, rev, (path or self.path), **kwds)
              def _unlinkpaths(paths):
                  """silent, best-effort unlink"""
                  for path in paths:
                      try:
                          util.unlink(path)
                      except OSError:
                          pass
              class pathhelper(object):
                  """helper for getting paths for lockfile, linelog and revmap"""
                  def __init__(self, repo, path, opts=defaultopts):
                      # different options use different directories
                      self._vfspath = os.path.join(
                          b'fastannotate', opts.shortstr, encodedir(path)
                      )
                      self._repo = repo
                  @property
                  def dirname(self):
                      return os.path.dirname(self._repo.vfs.join(self._vfspath))
                  @property
                  def linelogpath(self):
                      return self._repo.vfs.join(self._vfspath + b'.l')
                  def lock(self):
                      return lockmod.lock(self._repo.vfs, self._vfspath + b'.lock')
                  @property
                  def revmappath(self):
                      return self._repo.vfs.join(self._vfspath + b'.m')
              @contextlib.contextmanager
              def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
                  """context needed to perform (fast) annotate on a file
                  an annotatecontext of a single file consists of two structures: the
                  linelog and the revmap. this function takes care of locking. only 1
                  process is allowed to write that file's linelog and revmap at a time.
                  when something goes wrong, this function will assume the linelog and the
                  revmap are in a bad state, and remove them from disk.
                  use this function in the following way:
                      with annotatecontext(...) as actx:
                          actx. ....
                  """
                  helper = pathhelper(repo, path, opts)
                  util.makedirs(helper.dirname)
                  revmappath = helper.revmappath
                  linelogpath = helper.linelogpath
                  actx = None
                  try:
                      with helper.lock():
                          actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
                          if rebuild:
                              actx.rebuild()
                          yield actx
                  except Exception:
                      if actx is not None:
                          actx.rebuild()
                      repo.ui.debug(b'fastannotate: %s: cache broken and deleted\n' % path)
                      raise
                  finally:
                      if actx is not None:
                          actx.close()
              def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
                  """like annotatecontext but get the context from a fctx. convenient when
                  used in fctx.annotate
                  """
                  repo = fctx._repo
                  path = fctx._path
                  if repo.ui.configbool(b'fastannotate', b'forcefollow', True):
                      follow = True
                  aopts = annotateopts(diffopts=diffopts, followrename=follow)
                  return annotatecontext(repo, path, aopts, rebuild)

hgext/fsmonitor/__init__.py

0 +5 -3

              # __init__.py - fsmonitor initialization and overrides
              #
              # Copyright 2013-2016 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''Faster status operations with the Watchman file monitor (EXPERIMENTAL)
              Integrates the file-watching program Watchman with Mercurial to produce faster
              status results.
              On a particular Linux system, for a real-world repository with over 400,000
              files hosted on ext4, vanilla `hg status` takes 1.3 seconds. On the same
              system, with fsmonitor it takes about 0.3 seconds.
              fsmonitor requires no configuration -- it will tell Watchman about your
              repository as necessary. You'll need to install Watchman from
              https://facebook.github.io/watchman/ and make sure it is in your PATH.
              fsmonitor is incompatible with the largefiles and eol extensions, and
              will disable itself if any of those are active.
              The following configuration options exist:
              ::
                  [fsmonitor]
                  mode = {off, on, paranoid}
              When `mode = off`, fsmonitor will disable itself (similar to not loading the
              extension at all). When `mode = on`, fsmonitor will be enabled (the default).
              When `mode = paranoid`, fsmonitor will query both Watchman and the filesystem,
              and ensure that the results are consistent.
              ::
                  [fsmonitor]
                  timeout = (float)
              A value, in seconds, that determines how long fsmonitor will wait for Watchman
              to return results. Defaults to `2.0`.
              ::
                  [fsmonitor]
                  blacklistusers = (list of userids)
              A list of usernames for which fsmonitor will disable itself altogether.
              ::
                  [fsmonitor]
                  walk_on_invalidate = (boolean)
              Whether or not to walk the whole repo ourselves when our cached state has been
              invalidated, for example when Watchman has been restarted or .hgignore rules
              have been changed. Walking the repo in that case can result in competing for
              I/O with Watchman. For large repos it is recommended to set this value to
              false. You may wish to set this to true if you have a very fast filesystem
              that can outpace the IPC overhead of getting the result data for the full repo
              from Watchman. Defaults to false.
              ::
                  [fsmonitor]
                  warn_when_unused = (boolean)
              Whether to print a warning during certain operations when fsmonitor would be
              beneficial to performance but isn't enabled.
              ::
                  [fsmonitor]
                  warn_update_file_count = (integer)
              If ``warn_when_unused`` is set and fsmonitor isn't enabled, a warning will
              be printed during working directory updates if this many files will be
              created.
              '''
              # Platforms Supported
              # ===================
              #
              # **Linux:** *Stable*. Watchman and fsmonitor are both known to work reliably,
              #   even under severe loads.
              #
              # **Mac OS X:** *Stable*. The Mercurial test suite passes with fsmonitor
              #   turned on, on case-insensitive HFS+. There has been a reasonable amount of
              #   user testing under normal loads.
              #
              # **Solaris, BSD:** *Alpha*. watchman and fsmonitor are believed to work, but
              #   very little testing has been done.
              #
              # **Windows:** *Alpha*. Not in a release version of watchman or fsmonitor yet.
              #
              # Known Issues
              # ============
              #
              # * fsmonitor will disable itself if any of the following extensions are
              #   enabled: largefiles, inotify, eol; or if the repository has subrepos.
              # * fsmonitor will produce incorrect results if nested repos that are not
              #   subrepos exist. *Workaround*: add nested repo paths to your `.hgignore`.
              #
              # The issues related to nested repos and subrepos are probably not fundamental
              # ones. Patches to fix them are welcome.
              from __future__ import absolute_import
              import codecs
-             import hashlib
              import os
              import stat
              import sys
              import tempfile
              import weakref
              from mercurial.i18n import _
              from mercurial.node import hex
              from mercurial.pycompat import open
              from mercurial import (
                  context,
                  encoding,
                  error,
                  extensions,
                  localrepo,
                  merge,
                  pathutil,
                  pycompat,
                  registrar,
                  scmutil,
                  util,
              )
              from mercurial import match as matchmod
-             from mercurial.utils import stringutil
+             from mercurial.utils import (
+                 hashutil,
+                 stringutil,
+             )
              from . import (
                  pywatchman,
                  state,
                  watchmanclient,
              )
              # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
              # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
              # be specifying the version(s) of Mercurial they are tested with, or
              # leave the attribute unspecified.
              testedwith = b'ships-with-hg-core'
              configtable = {}
              configitem = registrar.configitem(configtable)
              configitem(
                  b'fsmonitor', b'mode', default=b'on',
              )
              configitem(
                  b'fsmonitor', b'walk_on_invalidate', default=False,
              )
              configitem(
                  b'fsmonitor', b'timeout', default=b'2',
              )
              configitem(
                  b'fsmonitor', b'blacklistusers', default=list,
              )
              configitem(
                  b'fsmonitor', b'watchman_exe', default=b'watchman',
              )
              configitem(
                  b'fsmonitor', b'verbose', default=True, experimental=True,
              )
              configitem(
                  b'experimental', b'fsmonitor.transaction_notify', default=False,
              )
              # This extension is incompatible with the following blacklisted extensions
              # and will disable itself when encountering one of these:
              _blacklist = [b'largefiles', b'eol']
              def debuginstall(ui, fm):
                  fm.write(
                      b"fsmonitor-watchman",
                      _(b"fsmonitor checking for watchman binary... (%s)\n"),
                      ui.configpath(b"fsmonitor", b"watchman_exe"),
                  )
                  root = tempfile.mkdtemp()
                  c = watchmanclient.client(ui, root)
                  err = None
                  try:
                      v = c.command(b"version")
                      fm.write(
                          b"fsmonitor-watchman-version",
                          _(b" watchman binary version %s\n"),
                          pycompat.bytestr(v["version"]),
                      )
                  except watchmanclient.Unavailable as e:
                      err = stringutil.forcebytestr(e)
                  fm.condwrite(
                      err,
                      b"fsmonitor-watchman-error",
                      _(b" watchman binary missing or broken: %s\n"),
                      err,
                  )
                  return 1 if err else 0
              def _handleunavailable(ui, state, ex):
                  """Exception handler for Watchman interaction exceptions"""
                  if isinstance(ex, watchmanclient.Unavailable):
                      # experimental config: fsmonitor.verbose
                      if ex.warn and ui.configbool(b'fsmonitor', b'verbose'):
                          if b'illegal_fstypes' not in stringutil.forcebytestr(ex):
                              ui.warn(stringutil.forcebytestr(ex) + b'\n')
                      if ex.invalidate:
                          state.invalidate()
                      # experimental config: fsmonitor.verbose
                      if ui.configbool(b'fsmonitor', b'verbose'):
                          ui.log(
                              b'fsmonitor',
                              b'Watchman unavailable: %s\n',
                              stringutil.forcebytestr(ex.msg),
                          )
                  else:
                      ui.log(
                          b'fsmonitor',
                          b'Watchman exception: %s\n',
                          stringutil.forcebytestr(ex),
                      )
              def _hashignore(ignore):
                  """Calculate hash for ignore patterns and filenames
                  If this information changes between Mercurial invocations, we can't
                  rely on Watchman information anymore and have to re-scan the working
                  copy.
                  """
-                 sha1 = hashlib.sha1()
+                 sha1 = hashutil.sha1()
                  sha1.update(pycompat.byterepr(ignore))
                  return pycompat.sysbytes(sha1.hexdigest())
              _watchmanencoding = pywatchman.encoding.get_local_encoding()
              _fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
              _fixencoding = codecs.lookup(_watchmanencoding) != codecs.lookup(_fsencoding)
              def _watchmantofsencoding(path):
                  """Fix path to match watchman and local filesystem encoding
                  watchman's paths encoding can differ from filesystem encoding. For example,
                  on Windows, it's always utf-8.
                  """
                  try:
                      decoded = path.decode(_watchmanencoding)
                  except UnicodeDecodeError as e:
                      raise error.Abort(
                          stringutil.forcebytestr(e), hint=b'watchman encoding error'
                      )
                  try:
                      encoded = decoded.encode(_fsencoding, 'strict')
                  except UnicodeEncodeError as e:
                      raise error.Abort(stringutil.forcebytestr(e))
                  return encoded
              def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
                  '''Replacement for dirstate.walk, hooking into Watchman.
                  Whenever full is False, ignored is False, and the Watchman client is
                  available, use Watchman combined with saved state to possibly return only a
                  subset of files.'''
                  def bail(reason):
                      self._ui.debug(b'fsmonitor: fallback to core status, %s\n' % reason)
                      return orig(match, subrepos, unknown, ignored, full=True)
                  if full:
                      return bail(b'full rewalk requested')
                  if ignored:
                      return bail(b'listing ignored files')
                  if not self._watchmanclient.available():
                      return bail(b'client unavailable')
                  state = self._fsmonitorstate
                  clock, ignorehash, notefiles = state.get()
                  if not clock:
                      if state.walk_on_invalidate:
                          return bail(b'no clock')
                      # Initial NULL clock value, see
                      # https://facebook.github.io/watchman/docs/clockspec.html
                      clock = b'c:0:0'
                      notefiles = []
                  ignore = self._ignore
                  dirignore = self._dirignore
                  if unknown:
                      if _hashignore(ignore) != ignorehash and clock != b'c:0:0':
                          # ignore list changed -- can't rely on Watchman state any more
                          if state.walk_on_invalidate:
                              return bail(b'ignore rules changed')
                          notefiles = []
                          clock = b'c:0:0'
                  else:
                      # always ignore
                      ignore = util.always
                      dirignore = util.always
                  matchfn = match.matchfn
                  matchalways = match.always()
                  dmap = self._map
                  if util.safehasattr(dmap, b'_map'):
                      # for better performance, directly access the inner dirstate map if the
                      # standard dirstate implementation is in use.
                      dmap = dmap._map
                  nonnormalset = self._map.nonnormalset
                  copymap = self._map.copymap
                  getkind = stat.S_IFMT
                  dirkind = stat.S_IFDIR
                  regkind = stat.S_IFREG
                  lnkkind = stat.S_IFLNK
                  join = self._join
                  normcase = util.normcase
                  fresh_instance = False
                  exact = skipstep3 = False
                  if match.isexact():  # match.exact
                      exact = True
                      dirignore = util.always  # skip step 2
                  elif match.prefix():  # match.match, no patterns
                      skipstep3 = True
                  if not exact and self._checkcase:
                      # note that even though we could receive directory entries, we're only
                      # interested in checking if a file with the same name exists. So only
                      # normalize files if possible.
                      normalize = self._normalizefile
                      skipstep3 = False
                  else:
                      normalize = None
                  # step 1: find all explicit files
                  results, work, dirsnotfound = self._walkexplicit(match, subrepos)
                  skipstep3 = skipstep3 and not (work or dirsnotfound)
                  work = [d for d in work if not dirignore(d[0])]
                  if not work and (exact or skipstep3):
                      for s in subrepos:
                          del results[s]
                      del results[b'.hg']
                      return results
                  # step 2: query Watchman
                  try:
                      # Use the user-configured timeout for the query.
                      # Add a little slack over the top of the user query to allow for
                      # overheads while transferring the data
                      self._watchmanclient.settimeout(state.timeout + 0.1)
                      result = self._watchmanclient.command(
                          b'query',
                          {
                              b'fields': [b'mode', b'mtime', b'size', b'exists', b'name'],
                              b'since': clock,
                              b'expression': [
                                  b'not',
                                  [
                                      b'anyof',
                                      [b'dirname', b'.hg'],
                                      [b'name', b'.hg', b'wholename'],
                                  ],
                              ],
                              b'sync_timeout': int(state.timeout * 1000),
                              b'empty_on_fresh_instance': state.walk_on_invalidate,
                          },
                      )
                  except Exception as ex:
                      _handleunavailable(self._ui, state, ex)
                      self._watchmanclient.clearconnection()
                      return bail(b'exception during run')
                  else:
                      # We need to propagate the last observed clock up so that we
                      # can use it for our next query
                      state.setlastclock(pycompat.sysbytes(result[b'clock']))
                      if result[b'is_fresh_instance']:
                          if state.walk_on_invalidate:
                              state.invalidate()
                              return bail(b'fresh instance')
                          fresh_instance = True
                          # Ignore any prior noteable files from the state info
                          notefiles = []
                  # for file paths which require normalization and we encounter a case
                  # collision, we store our own foldmap
                  if normalize:
                      foldmap = dict((normcase(k), k) for k in results)
                  switch_slashes = pycompat.ossep == b'\\'
                  # The order of the results is, strictly speaking, undefined.
                  # For case changes on a case insensitive filesystem we may receive
                  # two entries, one with exists=True and another with exists=False.
                  # The exists=True entries in the same response should be interpreted
                  # as being happens-after the exists=False entries due to the way that
                  # Watchman tracks files.  We use this property to reconcile deletes
                  # for name case changes.
                  for entry in result[b'files']:
                      fname = entry[b'name']
                      # Watchman always give us a str. Normalize to bytes on Python 3
                      # using Watchman's encoding, if needed.
                      if not isinstance(fname, bytes):
                          fname = fname.encode(_watchmanencoding)
                      if _fixencoding:
                          fname = _watchmantofsencoding(fname)
                      if switch_slashes:
                          fname = fname.replace(b'\\', b'/')
                      if normalize:
                          normed = normcase(fname)
                          fname = normalize(fname, True, True)
                          foldmap[normed] = fname
                      fmode = entry[b'mode']
                      fexists = entry[b'exists']
                      kind = getkind(fmode)
                      if b'/.hg/' in fname or fname.endswith(b'/.hg'):
                          return bail(b'nested-repo-detected')
                      if not fexists:
                          # if marked as deleted and we don't already have a change
                          # record, mark it as deleted.  If we already have an entry
                          # for fname then it was either part of walkexplicit or was
                          # an earlier result that was a case change
                          if (
                              fname not in results
                              and fname in dmap
                              and (matchalways or matchfn(fname))
                          ):
                              results[fname] = None
                      elif kind == dirkind:
                          if fname in dmap and (matchalways or matchfn(fname)):
                              results[fname] = None
                      elif kind == regkind or kind == lnkkind:
                          if fname in dmap:
                              if matchalways or matchfn(fname):
                                  results[fname] = entry
                          elif (matchalways or matchfn(fname)) and not ignore(fname):
                              results[fname] = entry
                      elif fname in dmap and (matchalways or matchfn(fname)):
                          results[fname] = None
                  # step 3: query notable files we don't already know about
                  # XXX try not to iterate over the entire dmap
                  if normalize:
                      # any notable files that have changed case will already be handled
                      # above, so just check membership in the foldmap
                      notefiles = set(
                          (
                              normalize(f, True, True)
                              for f in notefiles
                              if normcase(f) not in foldmap
                          )
                      )
                  visit = set(
                      (
                          f
                          for f in notefiles
                          if (
                              f not in results and matchfn(f) and (f in dmap or not ignore(f))
                          )
                      )
                  )
                  if not fresh_instance:
                      if matchalways:
                          visit.update(f for f in nonnormalset if f not in results)
                          visit.update(f for f in copymap if f not in results)
                      else:
                          visit.update(
                              f for f in nonnormalset if f not in results and matchfn(f)
                          )
                          visit.update(f for f in copymap if f not in results and matchfn(f))
                  else:
                      if matchalways:
                          visit.update(
                              f for f, st in pycompat.iteritems(dmap) if f not in results
                          )
                          visit.update(f for f in copymap if f not in results)
                      else:
                          visit.update(
                              f
                              for f, st in pycompat.iteritems(dmap)
                              if f not in results and matchfn(f)
                          )
                          visit.update(f for f in copymap if f not in results and matchfn(f))
                  audit = pathutil.pathauditor(self._root, cached=True).check
                  auditpass = [f for f in visit if audit(f)]
                  auditpass.sort()
                  auditfail = visit.difference(auditpass)
                  for f in auditfail:
                      results[f] = None
                  nf = iter(auditpass)
                  for st in util.statfiles([join(f) for f in auditpass]):
                      f = next(nf)
                      if st or f in dmap:
                          results[f] = st
                  for s in subrepos:
                      del results[s]
                  del results[b'.hg']
                  return results
              def overridestatus(
                  orig,
                  self,
                  node1=b'.',
                  node2=None,
                  match=None,
                  ignored=False,
                  clean=False,
                  unknown=False,
                  listsubrepos=False,
              ):
                  listignored = ignored
                  listclean = clean
                  listunknown = unknown
                  def _cmpsets(l1, l2):
                      try:
                          if b'FSMONITOR_LOG_FILE' in encoding.environ:
                              fn = encoding.environ[b'FSMONITOR_LOG_FILE']
                              f = open(fn, b'wb')
                          else:
                              fn = b'fsmonitorfail.log'
                              f = self.vfs.open(fn, b'wb')
                      except (IOError, OSError):
                          self.ui.warn(_(b'warning: unable to write to %s\n') % fn)
                          return
                      try:
                          for i, (s1, s2) in enumerate(zip(l1, l2)):
                              if set(s1) != set(s2):
                                  f.write(b'sets at position %d are unequal\n' % i)
                                  f.write(b'watchman returned: %s\n' % s1)
                                  f.write(b'stat returned: %s\n' % s2)
                      finally:
                          f.close()
                  if isinstance(node1, context.changectx):
                      ctx1 = node1
                  else:
                      ctx1 = self[node1]
                  if isinstance(node2, context.changectx):
                      ctx2 = node2
                  else:
                      ctx2 = self[node2]
                  working = ctx2.rev() is None
                  parentworking = working and ctx1 == self[b'.']
                  match = match or matchmod.always()
                  # Maybe we can use this opportunity to update Watchman's state.
                  # Mercurial uses workingcommitctx and/or memctx to represent the part of
                  # the workingctx that is to be committed. So don't update the state in
                  # that case.
                  # HG_PENDING is set in the environment when the dirstate is being updated
                  # in the middle of a transaction; we must not update our state in that
                  # case, or we risk forgetting about changes in the working copy.
                  updatestate = (
                      parentworking
                      and match.always()
                      and not isinstance(ctx2, (context.workingcommitctx, context.memctx))
                      and b'HG_PENDING' not in encoding.environ
                  )
                  try:
                      if self._fsmonitorstate.walk_on_invalidate:
                          # Use a short timeout to query the current clock.  If that
                          # takes too long then we assume that the service will be slow
                          # to answer our query.
                          # walk_on_invalidate indicates that we prefer to walk the
                          # tree ourselves because we can ignore portions that Watchman
                          # cannot and we tend to be faster in the warmer buffer cache
                          # cases.
                          self._watchmanclient.settimeout(0.1)
                      else:
                          # Give Watchman more time to potentially complete its walk
                          # and return the initial clock.  In this mode we assume that
                          # the filesystem will be slower than parsing a potentially
                          # very large Watchman result set.
                          self._watchmanclient.settimeout(self._fsmonitorstate.timeout + 0.1)
                      startclock = self._watchmanclient.getcurrentclock()
                  except Exception as ex:
                      self._watchmanclient.clearconnection()
                      _handleunavailable(self.ui, self._fsmonitorstate, ex)
                      # boo, Watchman failed. bail
                      return orig(
                          node1,
                          node2,
                          match,
                          listignored,
                          listclean,
                          listunknown,
                          listsubrepos,
                      )
                  if updatestate:
                      # We need info about unknown files. This may make things slower the
                      # first time, but whatever.
                      stateunknown = True
                  else:
                      stateunknown = listunknown
                  if updatestate:
                      ps = poststatus(startclock)
                      self.addpostdsstatus(ps)
                  r = orig(
                      node1, node2, match, listignored, listclean, stateunknown, listsubrepos
                  )
                  modified, added, removed, deleted, unknown, ignored, clean = r
                  if not listunknown:
                      unknown = []
                  # don't do paranoid checks if we're not going to query Watchman anyway
                  full = listclean or match.traversedir is not None
                  if self._fsmonitorstate.mode == b'paranoid' and not full:
                      # run status again and fall back to the old walk this time
                      self.dirstate._fsmonitordisable = True
                      # shut the UI up
                      quiet = self.ui.quiet
                      self.ui.quiet = True
                      fout, ferr = self.ui.fout, self.ui.ferr
                      self.ui.fout = self.ui.ferr = open(os.devnull, b'wb')
                      try:
                          rv2 = orig(
                              node1,
                              node2,
                              match,
                              listignored,
                              listclean,
                              listunknown,
                              listsubrepos,
                          )
                      finally:
                          self.dirstate._fsmonitordisable = False
                          self.ui.quiet = quiet
                          self.ui.fout, self.ui.ferr = fout, ferr
                      # clean isn't tested since it's set to True above
                      with self.wlock():
                          _cmpsets(
                              [modified, added, removed, deleted, unknown, ignored, clean],
                              rv2,
                          )
                      modified, added, removed, deleted, unknown, ignored, clean = rv2
                  return scmutil.status(
                      modified, added, removed, deleted, unknown, ignored, clean
                  )
              class poststatus(object):
                  def __init__(self, startclock):
                      self._startclock = startclock
                  def __call__(self, wctx, status):
                      clock = wctx.repo()._fsmonitorstate.getlastclock() or self._startclock
                      hashignore = _hashignore(wctx.repo().dirstate._ignore)
                      notefiles = (
                          status.modified
                          + status.added
                          + status.removed
                          + status.deleted
                          + status.unknown
                      )
                      wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
              def makedirstate(repo, dirstate):
                  class fsmonitordirstate(dirstate.__class__):
                      def _fsmonitorinit(self, repo):
                          # _fsmonitordisable is used in paranoid mode
                          self._fsmonitordisable = False
                          self._fsmonitorstate = repo._fsmonitorstate
                          self._watchmanclient = repo._watchmanclient
                          self._repo = weakref.proxy(repo)
                      def walk(self, *args, **kwargs):
                          orig = super(fsmonitordirstate, self).walk
                          if self._fsmonitordisable:
                              return orig(*args, **kwargs)
                          return overridewalk(orig, self, *args, **kwargs)
                      def rebuild(self, *args, **kwargs):
                          self._fsmonitorstate.invalidate()
                          return super(fsmonitordirstate, self).rebuild(*args, **kwargs)
                      def invalidate(self, *args, **kwargs):
                          self._fsmonitorstate.invalidate()
                          return super(fsmonitordirstate, self).invalidate(*args, **kwargs)
                  dirstate.__class__ = fsmonitordirstate
                  dirstate._fsmonitorinit(repo)
              def wrapdirstate(orig, self):
                  ds = orig(self)
                  # only override the dirstate when Watchman is available for the repo
                  if util.safehasattr(self, b'_fsmonitorstate'):
                      makedirstate(self, ds)
                  return ds
              def extsetup(ui):
                  extensions.wrapfilecache(
                      localrepo.localrepository, b'dirstate', wrapdirstate
                  )
                  if pycompat.isdarwin:
                      # An assist for avoiding the dangling-symlink fsevents bug
                      extensions.wrapfunction(os, b'symlink', wrapsymlink)
                  extensions.wrapfunction(merge, b'update', wrapupdate)
              def wrapsymlink(orig, source, link_name):
                  ''' if we create a dangling symlink, also touch the parent dir
                  to encourage fsevents notifications to work more correctly '''
                  try:
                      return orig(source, link_name)
                  finally:
                      try:
                          os.utime(os.path.dirname(link_name), None)
                      except OSError:
                          pass
              class state_update(object):
                  ''' This context manager is responsible for dispatching the state-enter
                      and state-leave signals to the watchman service. The enter and leave
                      methods can be invoked manually (for scenarios where context manager
                      semantics are not possible). If parameters oldnode and newnode are None,
                      they will be populated based on current working copy in enter and
                      leave, respectively. Similarly, if the distance is none, it will be
                      calculated based on the oldnode and newnode in the leave method.'''
                  def __init__(
                      self,
                      repo,
                      name,
                      oldnode=None,
                      newnode=None,
                      distance=None,
                      partial=False,
                  ):
                      self.repo = repo.unfiltered()
                      self.name = name
                      self.oldnode = oldnode
                      self.newnode = newnode
                      self.distance = distance
                      self.partial = partial
                      self._lock = None
                      self.need_leave = False
                  def __enter__(self):
                      self.enter()
                  def enter(self):
                      # Make sure we have a wlock prior to sending notifications to watchman.
                      # We don't want to race with other actors. In the update case,
                      # merge.update is going to take the wlock almost immediately. We are
                      # effectively extending the lock around several short sanity checks.
                      if self.oldnode is None:
                          self.oldnode = self.repo[b'.'].node()
                      if self.repo.currentwlock() is None:
                          if util.safehasattr(self.repo, b'wlocknostateupdate'):
                              self._lock = self.repo.wlocknostateupdate()
                          else:
                              self._lock = self.repo.wlock()
                      self.need_leave = self._state(b'state-enter', hex(self.oldnode))
                      return self
                  def __exit__(self, type_, value, tb):
                      abort = True if type_ else False
                      self.exit(abort=abort)
                  def exit(self, abort=False):
                      try:
                          if self.need_leave:
                              status = b'failed' if abort else b'ok'
                              if self.newnode is None:
                                  self.newnode = self.repo[b'.'].node()
                              if self.distance is None:
                                  self.distance = calcdistance(
                                      self.repo, self.oldnode, self.newnode
                                  )
                              self._state(b'state-leave', hex(self.newnode), status=status)
                      finally:
                          self.need_leave = False
                          if self._lock:
                              self._lock.release()
                  def _state(self, cmd, commithash, status=b'ok'):
                      if not util.safehasattr(self.repo, b'_watchmanclient'):
                          return False
                      try:
                          self.repo._watchmanclient.command(
                              cmd,
                              {
                                  b'name': self.name,
                                  b'metadata': {
                                      # the target revision
                                      b'rev': commithash,
                                      # approximate number of commits between current and target
                                      b'distance': self.distance if self.distance else 0,
                                      # success/failure (only really meaningful for state-leave)
                                      b'status': status,
                                      # whether the working copy parent is changing
                                      b'partial': self.partial,
                                  },
                              },
                          )
                          return True
                      except Exception as e:
                          # Swallow any errors; fire and forget
                          self.repo.ui.log(
                              b'watchman', b'Exception %s while running %s\n', e, cmd
                          )
                          return False
              # Estimate the distance between two nodes
              def calcdistance(repo, oldnode, newnode):
                  anc = repo.changelog.ancestor(oldnode, newnode)
                  ancrev = repo[anc].rev()
                  distance = abs(repo[oldnode].rev() - ancrev) + abs(
                      repo[newnode].rev() - ancrev
                  )
                  return distance
              # Bracket working copy updates with calls to the watchman state-enter
              # and state-leave commands.  This allows clients to perform more intelligent
              # settling during bulk file change scenarios
              # https://facebook.github.io/watchman/docs/cmd/subscribe.html#advanced-settling
              def wrapupdate(
                  orig,
                  repo,
                  node,
                  branchmerge,
                  force,
                  ancestor=None,
                  mergeancestor=False,
                  labels=None,
                  matcher=None,
                  **kwargs
              ):
                  distance = 0
                  partial = True
                  oldnode = repo[b'.'].node()
                  newnode = repo[node].node()
                  if matcher is None or matcher.always():
                      partial = False
                      distance = calcdistance(repo.unfiltered(), oldnode, newnode)
                  with state_update(
                      repo,
                      name=b"hg.update",
                      oldnode=oldnode,
                      newnode=newnode,
                      distance=distance,
                      partial=partial,
                  ):
                      return orig(
                          repo,
                          node,
                          branchmerge,
                          force,
                          ancestor,
                          mergeancestor,
                          labels,
                          matcher,
                          **kwargs
                      )
              def repo_has_depth_one_nested_repo(repo):
                  for f in repo.wvfs.listdir():
                      if os.path.isdir(os.path.join(repo.root, f, b'.hg')):
                          msg = b'fsmonitor: sub-repository %r detected, fsmonitor disabled\n'
                          repo.ui.debug(msg % f)
                          return True
                  return False
              def reposetup(ui, repo):
                  # We don't work with largefiles or inotify
                  exts = extensions.enabled()
                  for ext in _blacklist:
                      if ext in exts:
                          ui.warn(
                              _(
                                  b'The fsmonitor extension is incompatible with the %s '
                                  b'extension and has been disabled.\n'
                              )
                              % ext
                          )
                          return
                  if repo.local():
                      # We don't work with subrepos either.
                      #
                      # if repo[None].substate can cause a dirstate parse, which is too
                      # slow. Instead, look for a file called hgsubstate,
                      if repo.wvfs.exists(b'.hgsubstate') or repo.wvfs.exists(b'.hgsub'):
                          return
                      if repo_has_depth_one_nested_repo(repo):
                          return
                      fsmonitorstate = state.state(repo)
                      if fsmonitorstate.mode == b'off':
                          return
                      try:
                          client = watchmanclient.client(repo.ui, repo.root)
                      except Exception as ex:
                          _handleunavailable(ui, fsmonitorstate, ex)
                          return
                      repo._fsmonitorstate = fsmonitorstate
                      repo._watchmanclient = client
                      dirstate, cached = localrepo.isfilecached(repo, b'dirstate')
                      if cached:
                          # at this point since fsmonitorstate wasn't present,
                          # repo.dirstate is not a fsmonitordirstate
                          makedirstate(repo, dirstate)
                      class fsmonitorrepo(repo.__class__):
                          def status(self, *args, **kwargs):
                              orig = super(fsmonitorrepo, self).status
                              return overridestatus(orig, self, *args, **kwargs)
                          def wlocknostateupdate(self, *args, **kwargs):
                              return super(fsmonitorrepo, self).wlock(*args, **kwargs)
                          def wlock(self, *args, **kwargs):
                              l = super(fsmonitorrepo, self).wlock(*args, **kwargs)
                              if not ui.configbool(
                                  b"experimental", b"fsmonitor.transaction_notify"
                              ):
                                  return l
                              if l.held != 1:
                                  return l
                              origrelease = l.releasefn
                              def staterelease():
                                  if origrelease:
                                      origrelease()
                                  if l.stateupdate:
                                      l.stateupdate.exit()
                                      l.stateupdate = None
                              try:
                                  l.stateupdate = None
                                  l.stateupdate = state_update(self, name=b"hg.transaction")
                                  l.stateupdate.enter()
                                  l.releasefn = staterelease
                              except Exception as e:
                                  # Swallow any errors; fire and forget
                                  self.ui.log(
                                      b'watchman', b'Exception in state update %s\n', e
                                  )
                              return l
                      repo.__class__ = fsmonitorrepo

hgext/infinitepush/store.py

0 +5 -3

              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              # based on bundleheads extension by Gregory Szorc <gps@mozilla.com>
              from __future__ import absolute_import
              import abc
-             import hashlib
              import os
              import subprocess
              import tempfile
              from mercurial.pycompat import open
              from mercurial import (
                  node,
                  pycompat,
              )
-             from mercurial.utils import procutil
+             from mercurial.utils import (
+                 hashutil,
+                 procutil,
+             )
              NamedTemporaryFile = tempfile.NamedTemporaryFile
              class BundleWriteException(Exception):
                  pass
              class BundleReadException(Exception):
                  pass
              class abstractbundlestore(object):  # pytype: disable=ignored-metaclass
                  """Defines the interface for bundle stores.
                  A bundle store is an entity that stores raw bundle data. It is a simple
                  key-value store. However, the keys are chosen by the store. The keys can
                  be any Python object understood by the corresponding bundle index (see
                  ``abstractbundleindex`` below).
                  """
                  __metaclass__ = abc.ABCMeta
                  @abc.abstractmethod
                  def write(self, data):
                      """Write bundle data to the store.
                      This function receives the raw data to be written as a str.
                      Throws BundleWriteException
                      The key of the written data MUST be returned.
                      """
                  @abc.abstractmethod
                  def read(self, key):
                      """Obtain bundle data for a key.
                      Returns None if the bundle isn't known.
                      Throws BundleReadException
                      The returned object should be a file object supporting read()
                      and close().
                      """
              class filebundlestore(object):
                  """bundle store in filesystem
                  meant for storing bundles somewhere on disk and on network filesystems
                  """
                  def __init__(self, ui, repo):
                      self.ui = ui
                      self.repo = repo
                      self.storepath = ui.configpath(b'scratchbranch', b'storepath')
                      if not self.storepath:
                          self.storepath = self.repo.vfs.join(
                              b"scratchbranches", b"filebundlestore"
                          )
                      if not os.path.exists(self.storepath):
                          os.makedirs(self.storepath)
                  def _dirpath(self, hashvalue):
                      """First two bytes of the hash are the name of the upper
                      level directory, next two bytes are the name of the
                      next level directory"""
                      return os.path.join(self.storepath, hashvalue[0:2], hashvalue[2:4])
                  def _filepath(self, filename):
                      return os.path.join(self._dirpath(filename), filename)
                  def write(self, data):
-                     filename = node.hex(hashlib.sha1(data).digest())
+                     filename = node.hex(hashutil.sha1(data).digest())
                      dirpath = self._dirpath(filename)
                      if not os.path.exists(dirpath):
                          os.makedirs(dirpath)
                      with open(self._filepath(filename), b'wb') as f:
                          f.write(data)
                      return filename
                  def read(self, key):
                      try:
                          with open(self._filepath(key), b'rb') as f:
                              return f.read()
                      except IOError:
                          return None
              class externalbundlestore(abstractbundlestore):
                  def __init__(self, put_binary, put_args, get_binary, get_args):
                      """
                      `put_binary` - path to binary file which uploads bundle to external
                          storage and prints key to stdout
                      `put_args` - format string with additional args to `put_binary`
                                   {filename} replacement field can be used.
                      `get_binary` - path to binary file which accepts filename and key
                          (in that order), downloads bundle from store and saves it to file
                      `get_args` - format string with additional args to `get_binary`.
                                   {filename} and {handle} replacement field can be used.
                      """
                      self.put_args = put_args
                      self.get_args = get_args
                      self.put_binary = put_binary
                      self.get_binary = get_binary
                  def _call_binary(self, args):
                      p = subprocess.Popen(
                          pycompat.rapply(procutil.tonativestr, args),
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          close_fds=True,
                      )
                      stdout, stderr = p.communicate()
                      returncode = p.returncode
                      return returncode, stdout, stderr
                  def write(self, data):
                      # Won't work on windows because you can't open file second time without
                      # closing it
                      # TODO: rewrite without str.format() and replace NamedTemporaryFile()
                      # with pycompat.namedtempfile()
                      with NamedTemporaryFile() as temp:
                          temp.write(data)
                          temp.flush()
                          temp.seek(0)
                          formatted_args = [
                              arg.format(filename=temp.name) for arg in self.put_args
                          ]
                          returncode, stdout, stderr = self._call_binary(
                              [self.put_binary] + formatted_args
                          )
                          if returncode != 0:
                              raise BundleWriteException(
                                  b'Failed to upload to external store: %s' % stderr
                              )
                          stdout_lines = stdout.splitlines()
                          if len(stdout_lines) == 1:
                              return stdout_lines[0]
                          else:
                              raise BundleWriteException(
                                  b'Bad output from %s: %s' % (self.put_binary, stdout)
                              )
                  def read(self, handle):
                      # Won't work on windows because you can't open file second time without
                      # closing it
                      # TODO: rewrite without str.format() and replace NamedTemporaryFile()
                      # with pycompat.namedtempfile()
                      with NamedTemporaryFile() as temp:
                          formatted_args = [
                              arg.format(filename=temp.name, handle=handle)
                              for arg in self.get_args
                          ]
                          returncode, stdout, stderr = self._call_binary(
                              [self.get_binary] + formatted_args
                          )
                          if returncode != 0:
                              raise BundleReadException(
                                  b'Failed to download from external store: %s' % stderr
                              )
                          return temp.read()

hgext/largefiles/lfcommands.py

0 +2 -2

              # Copyright 2009-2010 Gregory P. Ward
              # Copyright 2009-2010 Intelerad Medical Systems Incorporated
              # Copyright 2010-2011 Fog Creek Software
              # Copyright 2010-2011 Unity Technologies
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''High-level command function for lfconvert, plus the cmdtable.'''
              from __future__ import absolute_import
              import errno
-             import hashlib
              import os
              import shutil
              from mercurial.i18n import _
              from mercurial import (
                  cmdutil,
                  context,
                  error,
                  exthelper,
                  hg,
                  lock,
                  match as matchmod,
                  node,
                  pycompat,
                  scmutil,
                  util,
              )
+             from mercurial.utils import hashutil
              from ..convert import (
                  convcmd,
                  filemap,
              )
              from . import lfutil, storefactory
              release = lock.release
              # -- Commands ----------------------------------------------------------
              eh = exthelper.exthelper()
              @eh.command(
                  b'lfconvert',
                  [
                      (
                          b's',
                          b'size',
                          b'',
                          _(b'minimum size (MB) for files to be converted as largefiles'),
                          b'SIZE',
                      ),
                      (
                          b'',
                          b'to-normal',
                          False,
                          _(b'convert from a largefiles repo to a normal repo'),
                      ),
                  ],
                  _(b'hg lfconvert SOURCE DEST [FILE ...]'),
                  norepo=True,
                  inferrepo=True,
              )
              def lfconvert(ui, src, dest, *pats, **opts):
                  '''convert a normal repository to a largefiles repository
                  Convert repository SOURCE to a new repository DEST, identical to
                  SOURCE except that certain files will be converted as largefiles:
                  specifically, any file that matches any PATTERN *or* whose size is
                  above the minimum size threshold is converted as a largefile. The
                  size used to determine whether or not to track a file as a
                  largefile is the size of the first version of the file. The
                  minimum size can be specified either with --size or in
                  configuration as ``largefiles.size``.
                  After running this command you will need to make sure that
                  largefiles is enabled anywhere you intend to push the new
                  repository.
                  Use --to-normal to convert largefiles back to normal files; after
                  this, the DEST repository can be used without largefiles at all.'''
                  opts = pycompat.byteskwargs(opts)
                  if opts[b'to_normal']:
                      tolfile = False
                  else:
                      tolfile = True
                      size = lfutil.getminsize(ui, True, opts.get(b'size'), default=None)
                  if not hg.islocal(src):
                      raise error.Abort(_(b'%s is not a local Mercurial repo') % src)
                  if not hg.islocal(dest):
                      raise error.Abort(_(b'%s is not a local Mercurial repo') % dest)
                  rsrc = hg.repository(ui, src)
                  ui.status(_(b'initializing destination %s\n') % dest)
                  rdst = hg.repository(ui, dest, create=True)
                  success = False
                  dstwlock = dstlock = None
                  try:
                      # Get a list of all changesets in the source.  The easy way to do this
                      # is to simply walk the changelog, using changelog.nodesbetween().
                      # Take a look at mercurial/revlog.py:639 for more details.
                      # Use a generator instead of a list to decrease memory usage
                      ctxs = (
                          rsrc[ctx]
                          for ctx in rsrc.changelog.nodesbetween(None, rsrc.heads())[0]
                      )
                      revmap = {node.nullid: node.nullid}
                      if tolfile:
                          # Lock destination to prevent modification while it is converted to.
                          # Don't need to lock src because we are just reading from its
                          # history which can't change.
                          dstwlock = rdst.wlock()
                          dstlock = rdst.lock()
                          lfiles = set()
                          normalfiles = set()
                          if not pats:
                              pats = ui.configlist(lfutil.longname, b'patterns')
                          if pats:
                              matcher = matchmod.match(rsrc.root, b'', list(pats))
                          else:
                              matcher = None
                          lfiletohash = {}
                          with ui.makeprogress(
                              _(b'converting revisions'),
                              unit=_(b'revisions'),
                              total=rsrc[b'tip'].rev(),
                          ) as progress:
                              for ctx in ctxs:
                                  progress.update(ctx.rev())
                                  _lfconvert_addchangeset(
                                      rsrc,
                                      rdst,
                                      ctx,
                                      revmap,
                                      lfiles,
                                      normalfiles,
                                      matcher,
                                      size,
                                      lfiletohash,
                                  )
                          if rdst.wvfs.exists(lfutil.shortname):
                              rdst.wvfs.rmtree(lfutil.shortname)
                          for f in lfiletohash.keys():
                              if rdst.wvfs.isfile(f):
                                  rdst.wvfs.unlink(f)
                              try:
                                  rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
                              except OSError:
                                  pass
                          # If there were any files converted to largefiles, add largefiles
                          # to the destination repository's requirements.
                          if lfiles:
                              rdst.requirements.add(b'largefiles')
                              rdst._writerequirements()
                      else:
                          class lfsource(filemap.filemap_source):
                              def __init__(self, ui, source):
                                  super(lfsource, self).__init__(ui, source, None)
                                  self.filemapper.rename[lfutil.shortname] = b'.'
                              def getfile(self, name, rev):
                                  realname, realrev = rev
                                  f = super(lfsource, self).getfile(name, rev)
                                  if (
                                      not realname.startswith(lfutil.shortnameslash)
                                      or f[0] is None
                                  ):
                                      return f
                                  # Substitute in the largefile data for the hash
                                  hash = f[0].strip()
                                  path = lfutil.findfile(rsrc, hash)
                                  if path is None:
                                      raise error.Abort(
                                          _(b"missing largefile for '%s' in %s")
                                          % (realname, realrev)
                                      )
                                  return util.readfile(path), f[1]
                          class converter(convcmd.converter):
                              def __init__(self, ui, source, dest, revmapfile, opts):
                                  src = lfsource(ui, source)
                                  super(converter, self).__init__(
                                      ui, src, dest, revmapfile, opts
                                  )
                          found, missing = downloadlfiles(ui, rsrc)
                          if missing != 0:
                              raise error.Abort(_(b"all largefiles must be present locally"))
                          orig = convcmd.converter
                          convcmd.converter = converter
                          try:
                              convcmd.convert(
                                  ui, src, dest, source_type=b'hg', dest_type=b'hg'
                              )
                          finally:
                              convcmd.converter = orig
                      success = True
                  finally:
                      if tolfile:
                          rdst.dirstate.clear()
                          release(dstlock, dstwlock)
                      if not success:
                          # we failed, remove the new directory
                          shutil.rmtree(rdst.root)
              def _lfconvert_addchangeset(
                  rsrc, rdst, ctx, revmap, lfiles, normalfiles, matcher, size, lfiletohash
              ):
                  # Convert src parents to dst parents
                  parents = _convertparents(ctx, revmap)
                  # Generate list of changed files
                  files = _getchangedfiles(ctx, parents)
                  dstfiles = []
                  for f in files:
                      if f not in lfiles and f not in normalfiles:
                          islfile = _islfile(f, ctx, matcher, size)
                          # If this file was renamed or copied then copy
                          # the largefile-ness of its predecessor
                          if f in ctx.manifest():
                              fctx = ctx.filectx(f)
                              renamed = fctx.copysource()
                              if renamed is None:
                                  # the code below assumes renamed to be a boolean or a list
                                  # and won't quite work with the value None
                                  renamed = False
                              renamedlfile = renamed and renamed in lfiles
                              islfile |= renamedlfile
                              if b'l' in fctx.flags():
                                  if renamedlfile:
                                      raise error.Abort(
                                          _(b'renamed/copied largefile %s becomes symlink')
                                          % f
                                      )
                                  islfile = False
                          if islfile:
                              lfiles.add(f)
                          else:
                              normalfiles.add(f)
                      if f in lfiles:
                          fstandin = lfutil.standin(f)
                          dstfiles.append(fstandin)
                          # largefile in manifest if it has not been removed/renamed
                          if f in ctx.manifest():
                              fctx = ctx.filectx(f)
                              if b'l' in fctx.flags():
                                  renamed = fctx.copysource()
                                  if renamed and renamed in lfiles:
                                      raise error.Abort(
                                          _(b'largefile %s becomes symlink') % f
                                      )
                              # largefile was modified, update standins
-                             m = hashlib.sha1(b'')
+                             m = hashutil.sha1(b'')
                              m.update(ctx[f].data())
                              hash = node.hex(m.digest())
                              if f not in lfiletohash or lfiletohash[f] != hash:
                                  rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
                                  executable = b'x' in ctx[f].flags()
                                  lfutil.writestandin(rdst, fstandin, hash, executable)
                                  lfiletohash[f] = hash
                      else:
                          # normal file
                          dstfiles.append(f)
                  def getfilectx(repo, memctx, f):
                      srcfname = lfutil.splitstandin(f)
                      if srcfname is not None:
                          # if the file isn't in the manifest then it was removed
                          # or renamed, return None to indicate this
                          try:
                              fctx = ctx.filectx(srcfname)
                          except error.LookupError:
                              return None
                          renamed = fctx.copysource()
                          if renamed:
                              # standin is always a largefile because largefile-ness
                              # doesn't change after rename or copy
                              renamed = lfutil.standin(renamed)
                          return context.memfilectx(
                              repo,
                              memctx,
                              f,
                              lfiletohash[srcfname] + b'\n',
                              b'l' in fctx.flags(),
                              b'x' in fctx.flags(),
                              renamed,
                          )
                      else:
                          return _getnormalcontext(repo, ctx, f, revmap)
                  # Commit
                  _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
              def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
                  mctx = context.memctx(
                      rdst,
                      parents,
                      ctx.description(),
                      dstfiles,
                      getfilectx,
                      ctx.user(),
                      ctx.date(),
                      ctx.extra(),
                  )
                  ret = rdst.commitctx(mctx)
                  lfutil.copyalltostore(rdst, ret)
                  rdst.setparents(ret)
                  revmap[ctx.node()] = rdst.changelog.tip()
              # Generate list of changed files
              def _getchangedfiles(ctx, parents):
                  files = set(ctx.files())
                  if node.nullid not in parents:
                      mc = ctx.manifest()
                      for pctx in ctx.parents():
                          for fn in pctx.manifest().diff(mc):
                              files.add(fn)
                  return files
              # Convert src parents to dst parents
              def _convertparents(ctx, revmap):
                  parents = []
                  for p in ctx.parents():
                      parents.append(revmap[p.node()])
                  while len(parents) < 2:
                      parents.append(node.nullid)
                  return parents
              # Get memfilectx for a normal file
              def _getnormalcontext(repo, ctx, f, revmap):
                  try:
                      fctx = ctx.filectx(f)
                  except error.LookupError:
                      return None
                  renamed = fctx.copysource()
                  data = fctx.data()
                  if f == b'.hgtags':
                      data = _converttags(repo.ui, revmap, data)
                  return context.memfilectx(
                      repo, ctx, f, data, b'l' in fctx.flags(), b'x' in fctx.flags(), renamed
                  )
              # Remap tag data using a revision map
              def _converttags(ui, revmap, data):
                  newdata = []
                  for line in data.splitlines():
                      try:
                          id, name = line.split(b' ', 1)
                      except ValueError:
                          ui.warn(_(b'skipping incorrectly formatted tag %s\n') % line)
                          continue
                      try:
                          newid = node.bin(id)
                      except TypeError:
                          ui.warn(_(b'skipping incorrectly formatted id %s\n') % id)
                          continue
                      try:
                          newdata.append(b'%s %s\n' % (node.hex(revmap[newid]), name))
                      except KeyError:
                          ui.warn(_(b'no mapping for id %s\n') % id)
                          continue
                  return b''.join(newdata)
              def _islfile(file, ctx, matcher, size):
                  '''Return true if file should be considered a largefile, i.e.
                  matcher matches it or it is larger than size.'''
                  # never store special .hg* files as largefiles
                  if file == b'.hgtags' or file == b'.hgignore' or file == b'.hgsigs':
                      return False
                  if matcher and matcher(file):
                      return True
                  try:
                      return ctx.filectx(file).size() >= size * 1024 * 1024
                  except error.LookupError:
                      return False
              def uploadlfiles(ui, rsrc, rdst, files):
                  '''upload largefiles to the central store'''
                  if not files:
                      return
                  store = storefactory.openstore(rsrc, rdst, put=True)
                  at = 0
                  ui.debug(b"sending statlfile command for %d largefiles\n" % len(files))
                  retval = store.exists(files)
                  files = [h for h in files if not retval[h]]
                  ui.debug(b"%d largefiles need to be uploaded\n" % len(files))
                  with ui.makeprogress(
                      _(b'uploading largefiles'), unit=_(b'files'), total=len(files)
                  ) as progress:
                      for hash in files:
                          progress.update(at)
                          source = lfutil.findfile(rsrc, hash)
                          if not source:
                              raise error.Abort(
                                  _(
                                      b'largefile %s missing from store'
                                      b' (needs to be uploaded)'
                                  )
                                  % hash
                              )
                          # XXX check for errors here
                          store.put(source, hash)
                          at += 1
              def verifylfiles(ui, repo, all=False, contents=False):
                  '''Verify that every largefile revision in the current changeset
                  exists in the central store.  With --contents, also verify that
                  the contents of each local largefile file revision are correct (SHA-1 hash
                  matches the revision ID).  With --all, check every changeset in
                  this repository.'''
                  if all:
                      revs = repo.revs(b'all()')
                  else:
                      revs = [b'.']
                  store = storefactory.openstore(repo)
                  return store.verify(revs, contents=contents)
              def cachelfiles(ui, repo, node, filelist=None):
                  '''cachelfiles ensures that all largefiles needed by the specified revision
                  are present in the repository's largefile cache.
                  returns a tuple (cached, missing).  cached is the list of files downloaded
                  by this operation; missing is the list of files that were needed but could
                  not be found.'''
                  lfiles = lfutil.listlfiles(repo, node)
                  if filelist:
                      lfiles = set(lfiles) & set(filelist)
                  toget = []
                  ctx = repo[node]
                  for lfile in lfiles:
                      try:
                          expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
                      except IOError as err:
                          if err.errno == errno.ENOENT:
                              continue  # node must be None and standin wasn't found in wctx
                          raise
                      if not lfutil.findfile(repo, expectedhash):
                          toget.append((lfile, expectedhash))
                  if toget:
                      store = storefactory.openstore(repo)
                      ret = store.get(toget)
                      return ret
                  return ([], [])
              def downloadlfiles(ui, repo, rev=None):
                  match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
                  def prepare(ctx, fns):
                      pass
                  totalsuccess = 0
                  totalmissing = 0
                  if rev != []:  # walkchangerevs on empty list would return all revs
                      for ctx in cmdutil.walkchangerevs(repo, match, {b'rev': rev}, prepare):
                          success, missing = cachelfiles(ui, repo, ctx.node())
                          totalsuccess += len(success)
                          totalmissing += len(missing)
                  ui.status(_(b"%d additional largefiles cached\n") % totalsuccess)
                  if totalmissing > 0:
                      ui.status(_(b"%d largefiles failed to download\n") % totalmissing)
                  return totalsuccess, totalmissing
              def updatelfiles(
                  ui, repo, filelist=None, printmessage=None, normallookup=False
              ):
                  '''Update largefiles according to standins in the working directory
                  If ``printmessage`` is other than ``None``, it means "print (or
                  ignore, for false) message forcibly".
                  '''
                  statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
                  with repo.wlock():
                      lfdirstate = lfutil.openlfdirstate(ui, repo)
                      lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
                      if filelist is not None:
                          filelist = set(filelist)
                          lfiles = [f for f in lfiles if f in filelist]
                      update = {}
                      dropped = set()
                      updated, removed = 0, 0
                      wvfs = repo.wvfs
                      wctx = repo[None]
                      for lfile in lfiles:
                          lfileorig = os.path.relpath(
                              scmutil.backuppath(ui, repo, lfile), start=repo.root
                          )
                          standin = lfutil.standin(lfile)
                          standinorig = os.path.relpath(
                              scmutil.backuppath(ui, repo, standin), start=repo.root
                          )
                          if wvfs.exists(standin):
                              if wvfs.exists(standinorig) and wvfs.exists(lfile):
                                  shutil.copyfile(wvfs.join(lfile), wvfs.join(lfileorig))
                                  wvfs.unlinkpath(standinorig)
                              expecthash = lfutil.readasstandin(wctx[standin])
                              if expecthash != b'':
                                  if lfile not in wctx:  # not switched to normal file
                                      if repo.dirstate[standin] != b'?':
                                          wvfs.unlinkpath(lfile, ignoremissing=True)
                                      else:
                                          dropped.add(lfile)
                                  # use normallookup() to allocate an entry in largefiles
                                  # dirstate to prevent lfilesrepo.status() from reporting
                                  # missing files as removed.
                                  lfdirstate.normallookup(lfile)
                                  update[lfile] = expecthash
                          else:
                              # Remove lfiles for which the standin is deleted, unless the
                              # lfile is added to the repository again. This happens when a
                              # largefile is converted back to a normal file: the standin
                              # disappears, but a new (normal) file appears as the lfile.
                              if (
                                  wvfs.exists(lfile)
                                  and repo.dirstate.normalize(lfile) not in wctx
                              ):
                                  wvfs.unlinkpath(lfile)
                                  removed += 1
                      # largefile processing might be slow and be interrupted - be prepared
                      lfdirstate.write()
                      if lfiles:
                          lfiles = [f for f in lfiles if f not in dropped]
                          for f in dropped:
                              repo.wvfs.unlinkpath(lfutil.standin(f))
                              # This needs to happen for dropped files, otherwise they stay in
                              # the M state.
                              lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
                          statuswriter(_(b'getting changed largefiles\n'))
                          cachelfiles(ui, repo, None, lfiles)
                      for lfile in lfiles:
                          update1 = 0
                          expecthash = update.get(lfile)
                          if expecthash:
                              if not lfutil.copyfromcache(repo, expecthash, lfile):
                                  # failed ... but already removed and set to normallookup
                                  continue
                              # Synchronize largefile dirstate to the last modified
                              # time of the file
                              lfdirstate.normal(lfile)
                              update1 = 1
                          # copy the exec mode of largefile standin from the repository's
                          # dirstate to its state in the lfdirstate.
                          standin = lfutil.standin(lfile)
                          if wvfs.exists(standin):
                              # exec is decided by the users permissions using mask 0o100
                              standinexec = wvfs.stat(standin).st_mode & 0o100
                              st = wvfs.stat(lfile)
                              mode = st.st_mode
                              if standinexec != mode & 0o100:
                                  # first remove all X bits, then shift all R bits to X
                                  mode &= ~0o111
                                  if standinexec:
                                      mode |= (mode >> 2) & 0o111 & ~util.umask
                                  wvfs.chmod(lfile, mode)
                                  update1 = 1
                          updated += update1
                          lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
                      lfdirstate.write()
                      if lfiles:
                          statuswriter(
                              _(b'%d largefiles updated, %d removed\n') % (updated, removed)
                          )
              @eh.command(
                  b'lfpull',
                  [(b'r', b'rev', [], _(b'pull largefiles for these revisions'))]
                  + cmdutil.remoteopts,
                  _(b'-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'),
              )
              def lfpull(ui, repo, source=b"default", **opts):
                  """pull largefiles for the specified revisions from the specified source
                  Pull largefiles that are referenced from local changesets but missing
                  locally, pulling from a remote repository to the local cache.
                  If SOURCE is omitted, the 'default' path will be used.
                  See :hg:`help urls` for more information.
                  .. container:: verbose
                    Some examples:
                    - pull largefiles for all branch heads::
                        hg lfpull -r "head() and not closed()"
                    - pull largefiles on the default branch::
                        hg lfpull -r "branch(default)"
                  """
                  repo.lfpullsource = source
                  revs = opts.get('rev', [])
                  if not revs:
                      raise error.Abort(_(b'no revisions specified'))
                  revs = scmutil.revrange(repo, revs)
                  numcached = 0
                  for rev in revs:
                      ui.note(_(b'pulling largefiles for revision %d\n') % rev)
                      (cached, missing) = cachelfiles(ui, repo, rev)
                      numcached += len(cached)
                  ui.status(_(b"%d largefiles cached\n") % numcached)
              @eh.command(b'debuglfput', [] + cmdutil.remoteopts, _(b'FILE'))
              def debuglfput(ui, repo, filepath, **kwargs):
                  hash = lfutil.hashfile(filepath)
                  storefactory.openstore(repo).put(filepath, hash)
                  ui.write(b'%s\n' % hash)
                  return 0

hgext/largefiles/lfutil.py

0 +3 -3

              # Copyright 2009-2010 Gregory P. Ward
              # Copyright 2009-2010 Intelerad Medical Systems Incorporated
              # Copyright 2010-2011 Fog Creek Software
              # Copyright 2010-2011 Unity Technologies
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              '''largefiles utility code: must not import other modules in this package.'''
              from __future__ import absolute_import
              import contextlib
              import copy
-             import hashlib
              import os
              import stat
              from mercurial.i18n import _
              from mercurial.node import hex
              from mercurial.pycompat import open
              from mercurial import (
                  dirstate,
                  encoding,
                  error,
                  httpconnection,
                  match as matchmod,
                  node,
                  pycompat,
                  scmutil,
                  sparse,
                  util,
                  vfs as vfsmod,
              )
+             from mercurial.utils import hashutil
              shortname = b'.hglf'
              shortnameslash = shortname + b'/'
              longname = b'largefiles'
              # -- Private worker functions ------------------------------------------
              @contextlib.contextmanager
              def lfstatus(repo, value=True):
                  oldvalue = getattr(repo, 'lfstatus', False)
                  repo.lfstatus = value
                  try:
                      yield
                  finally:
                      repo.lfstatus = oldvalue
              def getminsize(ui, assumelfiles, opt, default=10):
                  lfsize = opt
                  if not lfsize and assumelfiles:
                      lfsize = ui.config(longname, b'minsize', default=default)
                  if lfsize:
                      try:
                          lfsize = float(lfsize)
                      except ValueError:
                          raise error.Abort(
                              _(b'largefiles: size must be number (not %s)\n') % lfsize
                          )
                  if lfsize is None:
                      raise error.Abort(_(b'minimum size for largefiles must be specified'))
                  return lfsize
              def link(src, dest):
                  """Try to create hardlink - if that fails, efficiently make a copy."""
                  util.makedirs(os.path.dirname(dest))
                  try:
                      util.oslink(src, dest)
                  except OSError:
                      # if hardlinks fail, fallback on atomic copy
                      with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
                          for chunk in util.filechunkiter(srcf):
                              dstf.write(chunk)
                      os.chmod(dest, os.stat(src).st_mode)
              def usercachepath(ui, hash):
                  '''Return the correct location in the "global" largefiles cache for a file
                  with the given hash.
                  This cache is used for sharing of largefiles across repositories - both
                  to preserve download bandwidth and storage space.'''
                  return os.path.join(_usercachedir(ui), hash)
              def _usercachedir(ui, name=longname):
                  '''Return the location of the "global" largefiles cache.'''
                  path = ui.configpath(name, b'usercache')
                  if path:
                      return path
                  if pycompat.iswindows:
                      appdata = encoding.environ.get(
                          b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
                      )
                      if appdata:
                          return os.path.join(appdata, name)
                  elif pycompat.isdarwin:
                      home = encoding.environ.get(b'HOME')
                      if home:
                          return os.path.join(home, b'Library', b'Caches', name)
                  elif pycompat.isposix:
                      path = encoding.environ.get(b'XDG_CACHE_HOME')
                      if path:
                          return os.path.join(path, name)
                      home = encoding.environ.get(b'HOME')
                      if home:
                          return os.path.join(home, b'.cache', name)
                  else:
                      raise error.Abort(
                          _(b'unknown operating system: %s\n') % pycompat.osname
                      )
                  raise error.Abort(_(b'unknown %s usercache location') % name)
              def inusercache(ui, hash):
                  path = usercachepath(ui, hash)
                  return os.path.exists(path)
              def findfile(repo, hash):
                  '''Return store path of the largefile with the specified hash.
                  As a side effect, the file might be linked from user cache.
                  Return None if the file can't be found locally.'''
                  path, exists = findstorepath(repo, hash)
                  if exists:
                      repo.ui.note(_(b'found %s in store\n') % hash)
                      return path
                  elif inusercache(repo.ui, hash):
                      repo.ui.note(_(b'found %s in system cache\n') % hash)
                      path = storepath(repo, hash)
                      link(usercachepath(repo.ui, hash), path)
                      return path
                  return None
              class largefilesdirstate(dirstate.dirstate):
                  def __getitem__(self, key):
                      return super(largefilesdirstate, self).__getitem__(unixpath(key))
                  def normal(self, f):
                      return super(largefilesdirstate, self).normal(unixpath(f))
                  def remove(self, f):
                      return super(largefilesdirstate, self).remove(unixpath(f))
                  def add(self, f):
                      return super(largefilesdirstate, self).add(unixpath(f))
                  def drop(self, f):
                      return super(largefilesdirstate, self).drop(unixpath(f))
                  def forget(self, f):
                      return super(largefilesdirstate, self).forget(unixpath(f))
                  def normallookup(self, f):
                      return super(largefilesdirstate, self).normallookup(unixpath(f))
                  def _ignore(self, f):
                      return False
                  def write(self, tr=False):
                      # (1) disable PENDING mode always
                      #     (lfdirstate isn't yet managed as a part of the transaction)
                      # (2) avoid develwarn 'use dirstate.write with ....'
                      super(largefilesdirstate, self).write(None)
              def openlfdirstate(ui, repo, create=True):
                  '''
                  Return a dirstate object that tracks largefiles: i.e. its root is
                  the repo root, but it is saved in .hg/largefiles/dirstate.
                  '''
                  vfs = repo.vfs
                  lfstoredir = longname
                  opener = vfsmod.vfs(vfs.join(lfstoredir))
                  lfdirstate = largefilesdirstate(
                      opener,
                      ui,
                      repo.root,
                      repo.dirstate._validate,
                      lambda: sparse.matcher(repo),
                  )
                  # If the largefiles dirstate does not exist, populate and create
                  # it. This ensures that we create it on the first meaningful
                  # largefiles operation in a new clone.
                  if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
                      matcher = getstandinmatcher(repo)
                      standins = repo.dirstate.walk(
                          matcher, subrepos=[], unknown=False, ignored=False
                      )
                      if len(standins) > 0:
                          vfs.makedirs(lfstoredir)
                      for standin in standins:
                          lfile = splitstandin(standin)
                          lfdirstate.normallookup(lfile)
                  return lfdirstate
              def lfdirstatestatus(lfdirstate, repo):
                  pctx = repo[b'.']
                  match = matchmod.always()
                  unsure, s = lfdirstate.status(
                      match, subrepos=[], ignored=False, clean=False, unknown=False
                  )
                  modified, clean = s.modified, s.clean
                  for lfile in unsure:
                      try:
                          fctx = pctx[standin(lfile)]
                      except LookupError:
                          fctx = None
                      if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
                          modified.append(lfile)
                      else:
                          clean.append(lfile)
                          lfdirstate.normal(lfile)
                  return s
              def listlfiles(repo, rev=None, matcher=None):
                  '''return a list of largefiles in the working copy or the
                  specified changeset'''
                  if matcher is None:
                      matcher = getstandinmatcher(repo)
                  # ignore unknown files in working directory
                  return [
                      splitstandin(f)
                      for f in repo[rev].walk(matcher)
                      if rev is not None or repo.dirstate[f] != b'?'
                  ]
              def instore(repo, hash, forcelocal=False):
                  '''Return true if a largefile with the given hash exists in the store'''
                  return os.path.exists(storepath(repo, hash, forcelocal))
              def storepath(repo, hash, forcelocal=False):
                  '''Return the correct location in the repository largefiles store for a
                  file with the given hash.'''
                  if not forcelocal and repo.shared():
                      return repo.vfs.reljoin(repo.sharedpath, longname, hash)
                  return repo.vfs.join(longname, hash)
              def findstorepath(repo, hash):
                  '''Search through the local store path(s) to find the file for the given
                  hash.  If the file is not found, its path in the primary store is returned.
                  The return value is a tuple of (path, exists(path)).
                  '''
                  # For shared repos, the primary store is in the share source.  But for
                  # backward compatibility, force a lookup in the local store if it wasn't
                  # found in the share source.
                  path = storepath(repo, hash, False)
                  if instore(repo, hash):
                      return (path, True)
                  elif repo.shared() and instore(repo, hash, True):
                      return storepath(repo, hash, True), True
                  return (path, False)
              def copyfromcache(repo, hash, filename):
                  '''Copy the specified largefile from the repo or system cache to
                  filename in the repository. Return true on success or false if the
                  file was not found in either cache (which should not happened:
                  this is meant to be called only after ensuring that the needed
                  largefile exists in the cache).'''
                  wvfs = repo.wvfs
                  path = findfile(repo, hash)
                  if path is None:
                      return False
                  wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
                  # The write may fail before the file is fully written, but we
                  # don't use atomic writes in the working copy.
                  with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
                      gothash = copyandhash(util.filechunkiter(srcfd), destfd)
                  if gothash != hash:
                      repo.ui.warn(
                          _(b'%s: data corruption in %s with hash %s\n')
                          % (filename, path, gothash)
                      )
                      wvfs.unlink(filename)
                      return False
                  return True
              def copytostore(repo, ctx, file, fstandin):
                  wvfs = repo.wvfs
                  hash = readasstandin(ctx[fstandin])
                  if instore(repo, hash):
                      return
                  if wvfs.exists(file):
                      copytostoreabsolute(repo, wvfs.join(file), hash)
                  else:
                      repo.ui.warn(
                          _(b"%s: largefile %s not available from local store\n")
                          % (file, hash)
                      )
              def copyalltostore(repo, node):
                  '''Copy all largefiles in a given revision to the store'''
                  ctx = repo[node]
                  for filename in ctx.files():
                      realfile = splitstandin(filename)
                      if realfile is not None and filename in ctx.manifest():
                          copytostore(repo, ctx, realfile, filename)
              def copytostoreabsolute(repo, file, hash):
                  if inusercache(repo.ui, hash):
                      link(usercachepath(repo.ui, hash), storepath(repo, hash))
                  else:
                      util.makedirs(os.path.dirname(storepath(repo, hash)))
                      with open(file, b'rb') as srcf:
                          with util.atomictempfile(
                              storepath(repo, hash), createmode=repo.store.createmode
                          ) as dstf:
                              for chunk in util.filechunkiter(srcf):
                                  dstf.write(chunk)
                      linktousercache(repo, hash)
              def linktousercache(repo, hash):
                  '''Link / copy the largefile with the specified hash from the store
                  to the cache.'''
                  path = usercachepath(repo.ui, hash)
                  link(storepath(repo, hash), path)
              def getstandinmatcher(repo, rmatcher=None):
                  '''Return a match object that applies rmatcher to the standin directory'''
                  wvfs = repo.wvfs
                  standindir = shortname
                  # no warnings about missing files or directories
                  badfn = lambda f, msg: None
                  if rmatcher and not rmatcher.always():
                      pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
                      if not pats:
                          pats = [wvfs.join(standindir)]
                      match = scmutil.match(repo[None], pats, badfn=badfn)
                  else:
                      # no patterns: relative to repo root
                      match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
                  return match
              def composestandinmatcher(repo, rmatcher):
                  '''Return a matcher that accepts standins corresponding to the
                  files accepted by rmatcher. Pass the list of files in the matcher
                  as the paths specified by the user.'''
                  smatcher = getstandinmatcher(repo, rmatcher)
                  isstandin = smatcher.matchfn
                  def composedmatchfn(f):
                      return isstandin(f) and rmatcher.matchfn(splitstandin(f))
                  smatcher.matchfn = composedmatchfn
                  return smatcher
              def standin(filename):
                  '''Return the repo-relative path to the standin for the specified big
                  file.'''
                  # Notes:
                  # 1) Some callers want an absolute path, but for instance addlargefiles
                  #    needs it repo-relative so it can be passed to repo[None].add().  So
                  #    leave it up to the caller to use repo.wjoin() to get an absolute path.
                  # 2) Join with '/' because that's what dirstate always uses, even on
                  #    Windows. Change existing separator to '/' first in case we are
                  #    passed filenames from an external source (like the command line).
                  return shortnameslash + util.pconvert(filename)
              def isstandin(filename):
                  '''Return true if filename is a big file standin. filename must be
                  in Mercurial's internal form (slash-separated).'''
                  return filename.startswith(shortnameslash)
              def splitstandin(filename):
                  # Split on / because that's what dirstate always uses, even on Windows.
                  # Change local separator to / first just in case we are passed filenames
                  # from an external source (like the command line).
                  bits = util.pconvert(filename).split(b'/', 1)
                  if len(bits) == 2 and bits[0] == shortname:
                      return bits[1]
                  else:
                      return None
              def updatestandin(repo, lfile, standin):
                  """Re-calculate hash value of lfile and write it into standin
                  This assumes that "lfutil.standin(lfile) == standin", for efficiency.
                  """
                  file = repo.wjoin(lfile)
                  if repo.wvfs.exists(lfile):
                      hash = hashfile(file)
                      executable = getexecutable(file)
                      writestandin(repo, standin, hash, executable)
                  else:
                      raise error.Abort(_(b'%s: file not found!') % lfile)
              def readasstandin(fctx):
                  '''read hex hash from given filectx of standin file
                  This encapsulates how "standin" data is stored into storage layer.'''
                  return fctx.data().strip()
              def writestandin(repo, standin, hash, executable):
                  '''write hash to <repo.root>/<standin>'''
                  repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
              def copyandhash(instream, outfile):
                  '''Read bytes from instream (iterable) and write them to outfile,
                  computing the SHA-1 hash of the data along the way. Return the hash.'''
-                 hasher = hashlib.sha1(b'')
+                 hasher = hashutil.sha1(b'')
                  for data in instream:
                      hasher.update(data)
                      outfile.write(data)
                  return hex(hasher.digest())
              def hashfile(file):
                  if not os.path.exists(file):
                      return b''
                  with open(file, b'rb') as fd:
                      return hexsha1(fd)
              def getexecutable(filename):
                  mode = os.stat(filename).st_mode
                  return (
                      (mode & stat.S_IXUSR)
                      and (mode & stat.S_IXGRP)
                      and (mode & stat.S_IXOTH)
                  )
              def urljoin(first, second, *arg):
                  def join(left, right):
                      if not left.endswith(b'/'):
                          left += b'/'
                      if right.startswith(b'/'):
                          right = right[1:]
                      return left + right
                  url = join(first, second)
                  for a in arg:
                      url = join(url, a)
                  return url
              def hexsha1(fileobj):
                  """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
                  object data"""
-                 h = hashlib.sha1()
+                 h = hashutil.sha1()
                  for chunk in util.filechunkiter(fileobj):
                      h.update(chunk)
                  return hex(h.digest())
              def httpsendfile(ui, filename):
                  return httpconnection.httpsendfile(ui, filename, b'rb')
              def unixpath(path):
                  '''Return a version of path normalized for use with the lfdirstate.'''
                  return util.pconvert(os.path.normpath(path))
              def islfilesrepo(repo):
                  '''Return true if the repo is a largefile repo.'''
                  if b'largefiles' in repo.requirements and any(
                      shortnameslash in f[0] for f in repo.store.datafiles()
                  ):
                      return True
                  return any(openlfdirstate(repo.ui, repo, False))
              class storeprotonotcapable(Exception):
                  def __init__(self, storetypes):
                      self.storetypes = storetypes
              def getstandinsstate(repo):
                  standins = []
                  matcher = getstandinmatcher(repo)
                  wctx = repo[None]
                  for standin in repo.dirstate.walk(
                      matcher, subrepos=[], unknown=False, ignored=False
                  ):
                      lfile = splitstandin(standin)
                      try:
                          hash = readasstandin(wctx[standin])
                      except IOError:
                          hash = None
                      standins.append((lfile, hash))
                  return standins
              def synclfdirstate(repo, lfdirstate, lfile, normallookup):
                  lfstandin = standin(lfile)
                  if lfstandin in repo.dirstate:
                      stat = repo.dirstate._map[lfstandin]
                      state, mtime = stat[0], stat[3]
                  else:
                      state, mtime = b'?', -1
                  if state == b'n':
                      if normallookup or mtime < 0 or not repo.wvfs.exists(lfile):
                          # state 'n' doesn't ensure 'clean' in this case
                          lfdirstate.normallookup(lfile)
                      else:
                          lfdirstate.normal(lfile)
                  elif state == b'm':
                      lfdirstate.normallookup(lfile)
                  elif state == b'r':
                      lfdirstate.remove(lfile)
                  elif state == b'a':
                      lfdirstate.add(lfile)
                  elif state == b'?':
                      lfdirstate.drop(lfile)
              def markcommitted(orig, ctx, node):
                  repo = ctx.repo()
                  orig(node)
                  # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
                  # because files coming from the 2nd parent are omitted in the latter.
                  #
                  # The former should be used to get targets of "synclfdirstate",
                  # because such files:
                  # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
                  # - have to be marked as "n" after commit, but
                  # - aren't listed in "repo[node].files()"
                  lfdirstate = openlfdirstate(repo.ui, repo)
                  for f in ctx.files():
                      lfile = splitstandin(f)
                      if lfile is not None:
                          synclfdirstate(repo, lfdirstate, lfile, False)
                  lfdirstate.write()
                  # As part of committing, copy all of the largefiles into the cache.
                  #
                  # Using "node" instead of "ctx" implies additional "repo[node]"
                  # lookup while copyalltostore(), but can omit redundant check for
                  # files comming from the 2nd parent, which should exist in store
                  # at merging.
                  copyalltostore(repo, node)
              def getlfilestoupdate(oldstandins, newstandins):
                  changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
                  filelist = []
                  for f in changedstandins:
                      if f[0] not in filelist:
                          filelist.append(f[0])
                  return filelist
              def getlfilestoupload(repo, missing, addfunc):
                  makeprogress = repo.ui.makeprogress
                  with makeprogress(
                      _(b'finding outgoing largefiles'),
                      unit=_(b'revisions'),
                      total=len(missing),
                  ) as progress:
                      for i, n in enumerate(missing):
                          progress.update(i)
                          parents = [p for p in repo[n].parents() if p != node.nullid]
                          with lfstatus(repo, value=False):
                              ctx = repo[n]
                          files = set(ctx.files())
                          if len(parents) == 2:
                              mc = ctx.manifest()
                              mp1 = ctx.p1().manifest()
                              mp2 = ctx.p2().manifest()
                              for f in mp1:
                                  if f not in mc:
                                      files.add(f)
                              for f in mp2:
                                  if f not in mc:
                                      files.add(f)
                              for f in mc:
                                  if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
                                      files.add(f)
                          for fn in files:
                              if isstandin(fn) and fn in ctx:
                                  addfunc(fn, readasstandin(ctx[fn]))
              def updatestandinsbymatch(repo, match):
                  '''Update standins in the working directory according to specified match
                  This returns (possibly modified) ``match`` object to be used for
                  subsequent commit process.
                  '''
                  ui = repo.ui
                  # Case 1: user calls commit with no specific files or
                  # include/exclude patterns: refresh and commit all files that
                  # are "dirty".
                  if match is None or match.always():
                      # Spend a bit of time here to get a list of files we know
                      # are modified so we can compare only against those.
                      # It can cost a lot of time (several seconds)
                      # otherwise to update all standins if the largefiles are
                      # large.
                      lfdirstate = openlfdirstate(ui, repo)
                      dirtymatch = matchmod.always()
                      unsure, s = lfdirstate.status(
                          dirtymatch, subrepos=[], ignored=False, clean=False, unknown=False
                      )
                      modifiedfiles = unsure + s.modified + s.added + s.removed
                      lfiles = listlfiles(repo)
                      # this only loops through largefiles that exist (not
                      # removed/renamed)
                      for lfile in lfiles:
                          if lfile in modifiedfiles:
                              fstandin = standin(lfile)
                              if repo.wvfs.exists(fstandin):
                                  # this handles the case where a rebase is being
                                  # performed and the working copy is not updated
                                  # yet.
                                  if repo.wvfs.exists(lfile):
                                      updatestandin(repo, lfile, fstandin)
                      return match
                  lfiles = listlfiles(repo)
                  match._files = repo._subdirlfs(match.files(), lfiles)
                  # Case 2: user calls commit with specified patterns: refresh
                  # any matching big files.
                  smatcher = composestandinmatcher(repo, match)
                  standins = repo.dirstate.walk(
                      smatcher, subrepos=[], unknown=False, ignored=False
                  )
                  # No matching big files: get out of the way and pass control to
                  # the usual commit() method.
                  if not standins:
                      return match
                  # Refresh all matching big files.  It's possible that the
                  # commit will end up failing, in which case the big files will
                  # stay refreshed.  No harm done: the user modified them and
                  # asked to commit them, so sooner or later we're going to
                  # refresh the standins.  Might as well leave them refreshed.
                  lfdirstate = openlfdirstate(ui, repo)
                  for fstandin in standins:
                      lfile = splitstandin(fstandin)
                      if lfdirstate[lfile] != b'r':
                          updatestandin(repo, lfile, fstandin)
                  # Cook up a new matcher that only matches regular files or
                  # standins corresponding to the big files requested by the
                  # user.  Have to modify _files to prevent commit() from
                  # complaining "not tracked" for big files.
                  match = copy.copy(match)
                  origmatchfn = match.matchfn
                  # Check both the list of largefiles and the list of
                  # standins because if a largefile was removed, it
                  # won't be in the list of largefiles at this point
                  match._files += sorted(standins)
                  actualfiles = []
                  for f in match._files:
                      fstandin = standin(f)
                      # For largefiles, only one of the normal and standin should be
                      # committed (except if one of them is a remove).  In the case of a
                      # standin removal, drop the normal file if it is unknown to dirstate.
                      # Thus, skip plain largefile names but keep the standin.
                      if f in lfiles or fstandin in standins:
                          if repo.dirstate[fstandin] != b'r':
                              if repo.dirstate[f] != b'r':
                                  continue
                          elif repo.dirstate[f] == b'?':
                              continue
                      actualfiles.append(f)
                  match._files = actualfiles
                  def matchfn(f):
                      if origmatchfn(f):
                          return f not in lfiles
                      else:
                          return f in standins
                  match.matchfn = matchfn
                  return match
              class automatedcommithook(object):
                  '''Stateful hook to update standins at the 1st commit of resuming
                  For efficiency, updating standins in the working directory should
                  be avoided while automated committing (like rebase, transplant and
                  so on), because they should be updated before committing.
                  But the 1st commit of resuming automated committing (e.g. ``rebase
                  --continue``) should update them, because largefiles may be
                  modified manually.
                  '''
                  def __init__(self, resuming):
                      self.resuming = resuming
                  def __call__(self, repo, match):
                      if self.resuming:
                          self.resuming = False  # avoids updating at subsequent commits
                          return updatestandinsbymatch(repo, match)
                      else:
                          return match
              def getstatuswriter(ui, repo, forcibly=None):
                  '''Return the function to write largefiles specific status out
                  If ``forcibly`` is ``None``, this returns the last element of
                  ``repo._lfstatuswriters`` as "default" writer function.
                  Otherwise, this returns the function to always write out (or
                  ignore if ``not forcibly``) status.
                  '''
                  if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
                      return repo._lfstatuswriters[-1]
                  else:
                      if forcibly:
                          return ui.status  # forcibly WRITE OUT
                      else:
                          return lambda *msg, **opts: None  # forcibly IGNORE

hgext/remotefilelog/basepack.py

0 +2 -2

              from __future__ import absolute_import
              import collections
              import errno
-             import hashlib
              import mmap
              import os
              import struct
              import time
              from mercurial.i18n import _
              from mercurial.pycompat import (
                  getattr,
                  open,
              )
              from mercurial import (
                  node as nodemod,
                  policy,
                  pycompat,
                  util,
                  vfs as vfsmod,
              )
+             from mercurial.utils import hashutil
              from . import shallowutil
              osutil = policy.importmod('osutil')
              # The pack version supported by this implementation. This will need to be
              # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
              # changing any of the int sizes, changing the delta algorithm, etc.
              PACKVERSIONSIZE = 1
              INDEXVERSIONSIZE = 2
              FANOUTSTART = INDEXVERSIONSIZE
              # Constant that indicates a fanout table entry hasn't been filled in. (This does
              # not get serialized)
              EMPTYFANOUT = -1
              # The fanout prefix is the number of bytes that can be addressed by the fanout
              # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
              # look in the fanout table (which will be 2^8 entries long).
              SMALLFANOUTPREFIX = 1
              LARGEFANOUTPREFIX = 2
              # The number of entries in the index at which point we switch to a large fanout.
              # It is chosen to balance the linear scan through a sparse fanout, with the
              # size of the bisect in actual index.
              # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
              # bisect) with (8 step fanout scan + 1 step bisect)
              # 5 step bisect = log(2^16 / 8 / 255)  # fanout
              # 10 step fanout scan = 2^16 / (2^16 / 8)  # fanout space divided by entries
              SMALLFANOUTCUTOFF = 2 ** 16 // 8
              # The amount of time to wait between checking for new packs. This prevents an
              # exception when data is moved to a new pack after the process has already
              # loaded the pack list.
              REFRESHRATE = 0.1
              if pycompat.isposix and not pycompat.ispy3:
                  # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
                  # The 'e' flag will be ignored on older versions of glibc.
                  # Python 3 can't handle the 'e' flag.
                  PACKOPENMODE = b'rbe'
              else:
                  PACKOPENMODE = b'rb'
              class _cachebackedpacks(object):
                  def __init__(self, packs, cachesize):
                      self._packs = set(packs)
                      self._lrucache = util.lrucachedict(cachesize)
                      self._lastpack = None
                      # Avoid cold start of the cache by populating the most recent packs
                      # in the cache.
                      for i in reversed(range(min(cachesize, len(packs)))):
                          self._movetofront(packs[i])
                  def _movetofront(self, pack):
                      # This effectively makes pack the first entry in the cache.
                      self._lrucache[pack] = True
                  def _registerlastpackusage(self):
                      if self._lastpack is not None:
                          self._movetofront(self._lastpack)
                          self._lastpack = None
                  def add(self, pack):
                      self._registerlastpackusage()
                      # This method will mostly be called when packs are not in cache.
                      # Therefore, adding pack to the cache.
                      self._movetofront(pack)
                      self._packs.add(pack)
                  def __iter__(self):
                      self._registerlastpackusage()
                      # Cache iteration is based on LRU.
                      for pack in self._lrucache:
                          self._lastpack = pack
                          yield pack
                      cachedpacks = set(pack for pack in self._lrucache)
                      # Yield for paths not in the cache.
                      for pack in self._packs - cachedpacks:
                          self._lastpack = pack
                          yield pack
                      # Data not found in any pack.
                      self._lastpack = None
              class basepackstore(object):
                  # Default cache size limit for the pack files.
                  DEFAULTCACHESIZE = 100
                  def __init__(self, ui, path):
                      self.ui = ui
                      self.path = path
                      # lastrefesh is 0 so we'll immediately check for new packs on the first
                      # failure.
                      self.lastrefresh = 0
                      packs = []
                      for filepath, __, __ in self._getavailablepackfilessorted():
                          try:
                              pack = self.getpack(filepath)
                          except Exception as ex:
                              # An exception may be thrown if the pack file is corrupted
                              # somehow.  Log a warning but keep going in this case, just
                              # skipping this pack file.
                              #
                              # If this is an ENOENT error then don't even bother logging.
                              # Someone could have removed the file since we retrieved the
                              # list of paths.
                              if getattr(ex, 'errno', None) != errno.ENOENT:
                                  ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
                              continue
                          packs.append(pack)
                      self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
                  def _getavailablepackfiles(self):
                      """For each pack file (a index/data file combo), yields:
                        (full path without extension, mtime, size)
                      mtime will be the mtime of the index/data file (whichever is newer)
                      size is the combined size of index/data file
                      """
                      indexsuffixlen = len(self.INDEXSUFFIX)
                      packsuffixlen = len(self.PACKSUFFIX)
                      ids = set()
                      sizes = collections.defaultdict(lambda: 0)
                      mtimes = collections.defaultdict(lambda: [])
                      try:
                          for filename, type, stat in osutil.listdir(self.path, stat=True):
                              id = None
                              if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                                  id = filename[:-indexsuffixlen]
                              elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                                  id = filename[:-packsuffixlen]
                              # Since we expect to have two files corresponding to each ID
                              # (the index file and the pack file), we can yield once we see
                              # it twice.
                              if id:
                                  sizes[id] += stat.st_size  # Sum both files' sizes together
                                  mtimes[id].append(stat.st_mtime)
                                  if id in ids:
                                      yield (
                                          os.path.join(self.path, id),
                                          max(mtimes[id]),
                                          sizes[id],
                                      )
                                  else:
                                      ids.add(id)
                      except OSError as ex:
                          if ex.errno != errno.ENOENT:
                              raise
                  def _getavailablepackfilessorted(self):
                      """Like `_getavailablepackfiles`, but also sorts the files by mtime,
                      yielding newest files first.
                      This is desirable, since it is more likely newer packfiles have more
                      desirable data.
                      """
                      files = []
                      for path, mtime, size in self._getavailablepackfiles():
                          files.append((mtime, size, path))
                      files = sorted(files, reverse=True)
                      for mtime, size, path in files:
                          yield path, mtime, size
                  def gettotalsizeandcount(self):
                      """Returns the total disk size (in bytes) of all the pack files in
                      this store, and the count of pack files.
                      (This might be smaller than the total size of the ``self.path``
                      directory, since this only considers fuly-writen pack files, and not
                      temporary files or other detritus on the directory.)
                      """
                      totalsize = 0
                      count = 0
                      for __, __, size in self._getavailablepackfiles():
                          totalsize += size
                          count += 1
                      return totalsize, count
                  def getmetrics(self):
                      """Returns metrics on the state of this store."""
                      size, count = self.gettotalsizeandcount()
                      return {
                          b'numpacks': count,
                          b'totalpacksize': size,
                      }
                  def getpack(self, path):
                      raise NotImplementedError()
                  def getmissing(self, keys):
                      missing = keys
                      for pack in self.packs:
                          missing = pack.getmissing(missing)
                          # Ensures better performance of the cache by keeping the most
                          # recently accessed pack at the beginning in subsequent iterations.
                          if not missing:
                              return missing
                      if missing:
                          for pack in self.refresh():
                              missing = pack.getmissing(missing)
                      return missing
                  def markledger(self, ledger, options=None):
                      for pack in self.packs:
                          pack.markledger(ledger)
                  def markforrefresh(self):
                      """Tells the store that there may be new pack files, so the next time it
                      has a lookup miss it should check for new files."""
                      self.lastrefresh = 0
                  def refresh(self):
                      """Checks for any new packs on disk, adds them to the main pack list,
                      and returns a list of just the new packs."""
                      now = time.time()
                      # If we experience a lot of misses (like in the case of getmissing() on
                      # new objects), let's only actually check disk for new stuff every once
                      # in a while. Generally this code path should only ever matter when a
                      # repack is going on in the background, and that should be pretty rare
                      # to have that happen twice in quick succession.
                      newpacks = []
                      if now > self.lastrefresh + REFRESHRATE:
                          self.lastrefresh = now
                          previous = set(p.path for p in self.packs)
                          for filepath, __, __ in self._getavailablepackfilessorted():
                              if filepath not in previous:
                                  newpack = self.getpack(filepath)
                                  newpacks.append(newpack)
                                  self.packs.add(newpack)
                      return newpacks
              class versionmixin(object):
                  # Mix-in for classes with multiple supported versions
                  VERSION = None
                  SUPPORTED_VERSIONS = [2]
                  def _checkversion(self, version):
                      if version in self.SUPPORTED_VERSIONS:
                          if self.VERSION is None:
                              # only affect this instance
                              self.VERSION = version
                          elif self.VERSION != version:
                              raise RuntimeError(b'inconsistent version: %d' % version)
                      else:
                          raise RuntimeError(b'unsupported version: %d' % version)
              class basepack(versionmixin):
                  # The maximum amount we should read via mmap before remmaping so the old
                  # pages can be released (100MB)
                  MAXPAGEDIN = 100 * 1024 ** 2
                  SUPPORTED_VERSIONS = [2]
                  def __init__(self, path):
                      self.path = path
                      self.packpath = path + self.PACKSUFFIX
                      self.indexpath = path + self.INDEXSUFFIX
                      self.indexsize = os.stat(self.indexpath).st_size
                      self.datasize = os.stat(self.packpath).st_size
                      self._index = None
                      self._data = None
                      self.freememory()  # initialize the mmap
                      version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
                      self._checkversion(version)
                      version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
                      self._checkversion(version)
                      if 0b10000000 & config:
                          self.params = indexparams(LARGEFANOUTPREFIX, version)
                      else:
                          self.params = indexparams(SMALLFANOUTPREFIX, version)
                  @util.propertycache
                  def _fanouttable(self):
                      params = self.params
                      rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
                      fanouttable = []
                      for i in pycompat.xrange(0, params.fanoutcount):
                          loc = i * 4
                          fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
                          fanouttable.append(fanoutentry)
                      return fanouttable
                  @util.propertycache
                  def _indexend(self):
                      nodecount = struct.unpack_from(
                          b'!Q', self._index, self.params.indexstart - 8
                      )[0]
                      return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
                  def freememory(self):
                      """Unmap and remap the memory to free it up after known expensive
                      operations. Return True if self._data and self._index were reloaded.
                      """
                      if self._index:
                          if self._pagedin < self.MAXPAGEDIN:
                              return False
                          self._index.close()
                          self._data.close()
                      # TODO: use an opener/vfs to access these paths
                      with open(self.indexpath, PACKOPENMODE) as indexfp:
                          # memory-map the file, size 0 means whole file
                          self._index = mmap.mmap(
                              indexfp.fileno(), 0, access=mmap.ACCESS_READ
                          )
                      with open(self.packpath, PACKOPENMODE) as datafp:
                          self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
                      self._pagedin = 0
                      return True
                  def getmissing(self, keys):
                      raise NotImplementedError()
                  def markledger(self, ledger, options=None):
                      raise NotImplementedError()
                  def cleanup(self, ledger):
                      raise NotImplementedError()
                  def __iter__(self):
                      raise NotImplementedError()
                  def iterentries(self):
                      raise NotImplementedError()
              class mutablebasepack(versionmixin):
                  def __init__(self, ui, packdir, version=2):
                      self._checkversion(version)
                      # TODO(augie): make this configurable
                      self._compressor = b'GZ'
                      opener = vfsmod.vfs(packdir)
                      opener.createmode = 0o444
                      self.opener = opener
                      self.entries = {}
                      shallowutil.mkstickygroupdir(ui, packdir)
                      self.packfp, self.packpath = opener.mkstemp(
                          suffix=self.PACKSUFFIX + b'-tmp'
                      )
                      self.idxfp, self.idxpath = opener.mkstemp(
                          suffix=self.INDEXSUFFIX + b'-tmp'
                      )
                      self.packfp = os.fdopen(self.packfp, 'wb+')
                      self.idxfp = os.fdopen(self.idxfp, 'wb+')
-                     self.sha = hashlib.sha1()
+                     self.sha = hashutil.sha1()
                      self._closed = False
                      # The opener provides no way of doing permission fixup on files created
                      # via mkstemp, so we must fix it ourselves. We can probably fix this
                      # upstream in vfs.mkstemp so we don't need to use the private method.
                      opener._fixfilemode(opener.join(self.packpath))
                      opener._fixfilemode(opener.join(self.idxpath))
                      # Write header
                      # TODO: make it extensible (ex: allow specifying compression algorithm,
                      # a flexible key/value header, delta algorithm, fanout size, etc)
                      versionbuf = struct.pack(b'!B', self.VERSION)  # unsigned 1 byte int
                      self.writeraw(versionbuf)
                  def __enter__(self):
                      return self
                  def __exit__(self, exc_type, exc_value, traceback):
                      if exc_type is None:
                          self.close()
                      else:
                          self.abort()
                  def abort(self):
                      # Unclean exit
                      self._cleantemppacks()
                  def writeraw(self, data):
                      self.packfp.write(data)
                      self.sha.update(data)
                  def close(self, ledger=None):
                      if self._closed:
                          return
                      try:
                          sha = nodemod.hex(self.sha.digest())
                          self.packfp.close()
                          self.writeindex()
                          if len(self.entries) == 0:
                              # Empty pack
                              self._cleantemppacks()
                              self._closed = True
                              return None
                          self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
                          try:
                              self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
                          except Exception as ex:
                              try:
                                  self.opener.unlink(sha + self.PACKSUFFIX)
                              except Exception:
                                  pass
                              # Throw exception 'ex' explicitly since a normal 'raise' would
                              # potentially throw an exception from the unlink cleanup.
                              raise ex
                      except Exception:
                          # Clean up temp packs in all exception cases
                          self._cleantemppacks()
                          raise
                      self._closed = True
                      result = self.opener.join(sha)
                      if ledger:
                          ledger.addcreated(result)
                      return result
                  def _cleantemppacks(self):
                      try:
                          self.opener.unlink(self.packpath)
                      except Exception:
                          pass
                      try:
                          self.opener.unlink(self.idxpath)
                      except Exception:
                          pass
                  def writeindex(self):
                      largefanout = len(self.entries) > SMALLFANOUTCUTOFF
                      if largefanout:
                          params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
                      else:
                          params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
                      fanouttable = [EMPTYFANOUT] * params.fanoutcount
                      # Precompute the location of each entry
                      locations = {}
                      count = 0
                      for node in sorted(self.entries):
                          location = count * self.INDEXENTRYLENGTH
                          locations[node] = location
                          count += 1
                          # Must use [0] on the unpack result since it's always a tuple.
                          fanoutkey = struct.unpack(
                              params.fanoutstruct, node[: params.fanoutprefix]
                          )[0]
                          if fanouttable[fanoutkey] == EMPTYFANOUT:
                              fanouttable[fanoutkey] = location
                      rawfanouttable = b''
                      last = 0
                      for offset in fanouttable:
                          offset = offset if offset != EMPTYFANOUT else last
                          last = offset
                          rawfanouttable += struct.pack(b'!I', offset)
                      rawentrieslength = struct.pack(b'!Q', len(self.entries))
                      # The index offset is the it's location in the file. So after the 2 byte
                      # header and the fanouttable.
                      rawindex = self.createindex(locations, 2 + len(rawfanouttable))
                      self._writeheader(params)
                      self.idxfp.write(rawfanouttable)
                      self.idxfp.write(rawentrieslength)
                      self.idxfp.write(rawindex)
                      self.idxfp.close()
                  def createindex(self, nodelocations):
                      raise NotImplementedError()
                  def _writeheader(self, indexparams):
                      # Index header
                      #    <version: 1 byte>
                      #    <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
                      #    <unused: 7 bit> # future use (compression, delta format, etc)
                      config = 0
                      if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
                          config = 0b10000000
                      self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
              class indexparams(object):
                  __slots__ = (
                      'fanoutprefix',
                      'fanoutstruct',
                      'fanoutcount',
                      'fanoutsize',
                      'indexstart',
                  )
                  def __init__(self, prefixsize, version):
                      self.fanoutprefix = prefixsize
                      # The struct pack format for fanout table location (i.e. the format that
                      # converts the node prefix into an integer location in the fanout
                      # table).
                      if prefixsize == SMALLFANOUTPREFIX:
                          self.fanoutstruct = b'!B'
                      elif prefixsize == LARGEFANOUTPREFIX:
                          self.fanoutstruct = b'!H'
                      else:
                          raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
                      # The number of fanout table entries
                      self.fanoutcount = 2 ** (prefixsize * 8)
                      # The total bytes used by the fanout table
                      self.fanoutsize = self.fanoutcount * 4
                      self.indexstart = FANOUTSTART + self.fanoutsize
                      # Skip the index length
                      self.indexstart += 8

hgext/remotefilelog/basestore.py

0 +3 -3

              from __future__ import absolute_import
              import errno
-             import hashlib
              import os
              import shutil
              import stat
              import time
              from mercurial.i18n import _
              from mercurial.node import bin, hex
              from mercurial.pycompat import open
              from mercurial import (
                  error,
                  pycompat,
                  util,
              )
+             from mercurial.utils import hashutil
              from . import (
                  constants,
                  shallowutil,
              )
              class basestore(object):
                  def __init__(self, repo, path, reponame, shared=False):
                      """Creates a remotefilelog store object for the given repo name.
                      `path` - The file path where this store keeps its data
                      `reponame` - The name of the repo. This is used to partition data from
                      many repos.
                      `shared` - True if this store is a shared cache of data from the central
                      server, for many repos on this machine. False means this store is for
                      the local data for one repo.
                      """
                      self.repo = repo
                      self.ui = repo.ui
                      self._path = path
                      self._reponame = reponame
                      self._shared = shared
                      self._uid = os.getuid() if not pycompat.iswindows else None
                      self._validatecachelog = self.ui.config(
                          b"remotefilelog", b"validatecachelog"
                      )
                      self._validatecache = self.ui.config(
                          b"remotefilelog", b"validatecache", b'on'
                      )
                      if self._validatecache not in (b'on', b'strict', b'off'):
                          self._validatecache = b'on'
                      if self._validatecache == b'off':
                          self._validatecache = False
                      if shared:
                          shallowutil.mkstickygroupdir(self.ui, path)
                  def getmissing(self, keys):
                      missing = []
                      for name, node in keys:
                          filepath = self._getfilepath(name, node)
                          exists = os.path.exists(filepath)
                          if (
                              exists
                              and self._validatecache == b'strict'
                              and not self._validatekey(filepath, b'contains')
                          ):
                              exists = False
                          if not exists:
                              missing.append((name, node))
                      return missing
                  # BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
                  def markledger(self, ledger, options=None):
                      if options and options.get(constants.OPTION_PACKSONLY):
                          return
                      if self._shared:
                          for filename, nodes in self._getfiles():
                              for node in nodes:
                                  ledger.markdataentry(self, filename, node)
                                  ledger.markhistoryentry(self, filename, node)
                  def cleanup(self, ledger):
                      ui = self.ui
                      entries = ledger.sources.get(self, [])
                      count = 0
                      progress = ui.makeprogress(
                          _(b"cleaning up"), unit=b"files", total=len(entries)
                      )
                      for entry in entries:
                          if entry.gced or (entry.datarepacked and entry.historyrepacked):
                              progress.update(count)
                              path = self._getfilepath(entry.filename, entry.node)
                              util.tryunlink(path)
                          count += 1
                      progress.complete()
                      # Clean up the repo cache directory.
                      self._cleanupdirectory(self._getrepocachepath())
                  # BELOW THIS ARE NON-STANDARD APIS
                  def _cleanupdirectory(self, rootdir):
                      """Removes the empty directories and unnecessary files within the root
                      directory recursively. Note that this method does not remove the root
                      directory itself. """
                      oldfiles = set()
                      otherfiles = set()
                      # osutil.listdir returns stat information which saves some rmdir/listdir
                      # syscalls.
                      for name, mode in util.osutil.listdir(rootdir):
                          if stat.S_ISDIR(mode):
                              dirpath = os.path.join(rootdir, name)
                              self._cleanupdirectory(dirpath)
                              # Now that the directory specified by dirpath is potentially
                              # empty, try and remove it.
                              try:
                                  os.rmdir(dirpath)
                              except OSError:
                                  pass
                          elif stat.S_ISREG(mode):
                              if name.endswith(b'_old'):
                                  oldfiles.add(name[:-4])
                              else:
                                  otherfiles.add(name)
                      # Remove the files which end with suffix '_old' and have no
                      # corresponding file without the suffix '_old'. See addremotefilelognode
                      # method for the generation/purpose of files with '_old' suffix.
                      for filename in oldfiles - otherfiles:
                          filepath = os.path.join(rootdir, filename + b'_old')
                          util.tryunlink(filepath)
                  def _getfiles(self):
                      """Return a list of (filename, [node,...]) for all the revisions that
                      exist in the store.
                      This is useful for obtaining a list of all the contents of the store
                      when performing a repack to another store, since the store API requires
                      name+node keys and not namehash+node keys.
                      """
                      existing = {}
                      for filenamehash, node in self._listkeys():
                          existing.setdefault(filenamehash, []).append(node)
                      filenamemap = self._resolvefilenames(existing.keys())
                      for filename, sha in pycompat.iteritems(filenamemap):
                          yield (filename, existing[sha])
                  def _resolvefilenames(self, hashes):
                      """Given a list of filename hashes that are present in the
                      remotefilelog store, return a mapping from filename->hash.
                      This is useful when converting remotefilelog blobs into other storage
                      formats.
                      """
                      if not hashes:
                          return {}
                      filenames = {}
                      missingfilename = set(hashes)
                      # Start with a full manifest, since it'll cover the majority of files
                      for filename in self.repo[b'tip'].manifest():
-                         sha = hashlib.sha1(filename).digest()
+                         sha = hashutil.sha1(filename).digest()
                          if sha in missingfilename:
                              filenames[filename] = sha
                              missingfilename.discard(sha)
                      # Scan the changelog until we've found every file name
                      cl = self.repo.unfiltered().changelog
                      for rev in pycompat.xrange(len(cl) - 1, -1, -1):
                          if not missingfilename:
                              break
                          files = cl.readfiles(cl.node(rev))
                          for filename in files:
-                             sha = hashlib.sha1(filename).digest()
+                             sha = hashutil.sha1(filename).digest()
                              if sha in missingfilename:
                                  filenames[filename] = sha
                                  missingfilename.discard(sha)
                      return filenames
                  def _getrepocachepath(self):
                      return (
                          os.path.join(self._path, self._reponame)
                          if self._shared
                          else self._path
                      )
                  def _listkeys(self):
                      """List all the remotefilelog keys that exist in the store.
                      Returns a iterator of (filename hash, filecontent hash) tuples.
                      """
                      for root, dirs, files in os.walk(self._getrepocachepath()):
                          for filename in files:
                              if len(filename) != 40:
                                  continue
                              node = filename
                              if self._shared:
                                  # .../1a/85ffda..be21
                                  filenamehash = root[-41:-39] + root[-38:]
                              else:
                                  filenamehash = root[-40:]
                              yield (bin(filenamehash), bin(node))
                  def _getfilepath(self, name, node):
                      node = hex(node)
                      if self._shared:
                          key = shallowutil.getcachekey(self._reponame, name, node)
                      else:
                          key = shallowutil.getlocalkey(name, node)
                      return os.path.join(self._path, key)
                  def _getdata(self, name, node):
                      filepath = self._getfilepath(name, node)
                      try:
                          data = shallowutil.readfile(filepath)
                          if self._validatecache and not self._validatedata(data, filepath):
                              if self._validatecachelog:
                                  with open(self._validatecachelog, b'a+') as f:
                                      f.write(b"corrupt %s during read\n" % filepath)
                              os.rename(filepath, filepath + b".corrupt")
                              raise KeyError(b"corrupt local cache file %s" % filepath)
                      except IOError:
                          raise KeyError(
                              b"no file found at %s for %s:%s" % (filepath, name, hex(node))
                          )
                      return data
                  def addremotefilelognode(self, name, node, data):
                      filepath = self._getfilepath(name, node)
                      oldumask = os.umask(0o002)
                      try:
                          # if this node already exists, save the old version for
                          # recovery/debugging purposes.
                          if os.path.exists(filepath):
                              newfilename = filepath + b'_old'
                              # newfilename can be read-only and shutil.copy will fail.
                              # Delete newfilename to avoid it
                              if os.path.exists(newfilename):
                                  shallowutil.unlinkfile(newfilename)
                              shutil.copy(filepath, newfilename)
                          shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
                          shallowutil.writefile(filepath, data, readonly=True)
                          if self._validatecache:
                              if not self._validatekey(filepath, b'write'):
                                  raise error.Abort(
                                      _(b"local cache write was corrupted %s") % filepath
                                  )
                      finally:
                          os.umask(oldumask)
                  def markrepo(self, path):
                      """Call this to add the given repo path to the store's list of
                      repositories that are using it. This is useful later when doing garbage
                      collection, since it allows us to insecpt the repos to see what nodes
                      they want to be kept alive in the store.
                      """
                      repospath = os.path.join(self._path, b"repos")
                      with open(repospath, b'ab') as reposfile:
                          reposfile.write(os.path.dirname(path) + b"\n")
                      repospathstat = os.stat(repospath)
                      if repospathstat.st_uid == self._uid:
                          os.chmod(repospath, 0o0664)
                  def _validatekey(self, path, action):
                      with open(path, b'rb') as f:
                          data = f.read()
                      if self._validatedata(data, path):
                          return True
                      if self._validatecachelog:
                          with open(self._validatecachelog, b'ab+') as f:
                              f.write(b"corrupt %s during %s\n" % (path, action))
                      os.rename(path, path + b".corrupt")
                      return False
                  def _validatedata(self, data, path):
                      try:
                          if len(data) > 0:
                              # see remotefilelogserver.createfileblob for the format
                              offset, size, flags = shallowutil.parsesizeflags(data)
                              if len(data) <= size:
                                  # it is truncated
                                  return False
                              # extract the node from the metadata
                              offset += size
                              datanode = data[offset : offset + 20]
                              # and compare against the path
                              if os.path.basename(path) == hex(datanode):
                                  # Content matches the intended path
                                  return True
                              return False
                      except (ValueError, RuntimeError):
                          pass
                      return False
                  def gc(self, keepkeys):
                      ui = self.ui
                      cachepath = self._path
                      # prune cache
                      queue = pycompat.queue.PriorityQueue()
                      originalsize = 0
                      size = 0
                      count = 0
                      removed = 0
                      # keep files newer than a day even if they aren't needed
                      limit = time.time() - (60 * 60 * 24)
                      progress = ui.makeprogress(
                          _(b"removing unnecessary files"), unit=b"files"
                      )
                      progress.update(0)
                      for root, dirs, files in os.walk(cachepath):
                          for file in files:
                              if file == b'repos':
                                  continue
                              # Don't delete pack files
                              if b'/packs/' in root:
                                  continue
                              progress.update(count)
                              path = os.path.join(root, file)
                              key = os.path.relpath(path, cachepath)
                              count += 1
                              try:
                                  pathstat = os.stat(path)
                              except OSError as e:
                                  # errno.ENOENT = no such file or directory
                                  if e.errno != errno.ENOENT:
                                      raise
                                  msg = _(
                                      b"warning: file %s was removed by another process\n"
                                  )
                                  ui.warn(msg % path)
                                  continue
                              originalsize += pathstat.st_size
                              if key in keepkeys or pathstat.st_atime > limit:
                                  queue.put((pathstat.st_atime, path, pathstat))
                                  size += pathstat.st_size
                              else:
                                  try:
                                      shallowutil.unlinkfile(path)
                                  except OSError as e:
                                      # errno.ENOENT = no such file or directory
                                      if e.errno != errno.ENOENT:
                                          raise
                                      msg = _(
                                          b"warning: file %s was removed by another "
                                          b"process\n"
                                      )
                                      ui.warn(msg % path)
                                      continue
                                  removed += 1
                      progress.complete()
                      # remove oldest files until under limit
                      limit = ui.configbytes(b"remotefilelog", b"cachelimit")
                      if size > limit:
                          excess = size - limit
                          progress = ui.makeprogress(
                              _(b"enforcing cache limit"), unit=b"bytes", total=excess
                          )
                          removedexcess = 0
                          while queue and size > limit and size > 0:
                              progress.update(removedexcess)
                              atime, oldpath, oldpathstat = queue.get()
                              try:
                                  shallowutil.unlinkfile(oldpath)
                              except OSError as e:
                                  # errno.ENOENT = no such file or directory
                                  if e.errno != errno.ENOENT:
                                      raise
                                  msg = _(
                                      b"warning: file %s was removed by another process\n"
                                  )
                                  ui.warn(msg % oldpath)
                              size -= oldpathstat.st_size
                              removed += 1
                              removedexcess += oldpathstat.st_size
                          progress.complete()
                      ui.status(
                          _(b"finished: removed %d of %d files (%0.2f GB to %0.2f GB)\n")
                          % (
                              removed,
                              count,
                              float(originalsize) / 1024.0 / 1024.0 / 1024.0,
                              float(size) / 1024.0 / 1024.0 / 1024.0,
                          )
                      )
              class baseunionstore(object):
                  def __init__(self, *args, **kwargs):
                      # If one of the functions that iterates all of the stores is about to
                      # throw a KeyError, try this many times with a full refresh between
                      # attempts. A repack operation may have moved data from one store to
                      # another while we were running.
                      self.numattempts = kwargs.get('numretries', 0) + 1
                      # If not-None, call this function on every retry and if the attempts are
                      # exhausted.
                      self.retrylog = kwargs.get('retrylog', None)
                  def markforrefresh(self):
                      for store in self.stores:
                          if util.safehasattr(store, b'markforrefresh'):
                              store.markforrefresh()
                  @staticmethod
                  def retriable(fn):
                      def noop(*args):
                          pass
                      def wrapped(self, *args, **kwargs):
                          retrylog = self.retrylog or noop
                          funcname = fn.__name__
                          i = 0
                          while i < self.numattempts:
                              if i > 0:
                                  retrylog(
                                      b're-attempting (n=%d) %s\n'
                                      % (i, pycompat.sysbytes(funcname))
                                  )
                                  self.markforrefresh()
                              i += 1
                              try:
                                  return fn(self, *args, **kwargs)
                              except KeyError:
                                  if i == self.numattempts:
                                      # retries exhausted
                                      retrylog(
                                          b'retries exhausted in %s, raising KeyError\n'
                                          % pycompat.sysbytes(funcname)
                                      )
                                      raise
                      return wrapped

hgext/remotefilelog/debugcommands.py

0 +3 -3

              # debugcommands.py - debug logic for remotefilelog
              #
              # Copyright 2013 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
-             import hashlib
              import os
              import zlib
              from mercurial.node import bin, hex, nullid, short
              from mercurial.i18n import _
              from mercurial.pycompat import open
              from mercurial import (
                  error,
                  filelog,
                  lock as lockmod,
                  node as nodemod,
                  pycompat,
                  revlog,
              )
+             from mercurial.utils import hashutil
              from . import (
                  constants,
                  datapack,
                  fileserverclient,
                  historypack,
                  repack,
                  shallowutil,
              )
              def debugremotefilelog(ui, path, **opts):
                  decompress = opts.get('decompress')
                  size, firstnode, mapping = parsefileblob(path, decompress)
                  ui.status(_(b"size: %d bytes\n") % size)
                  ui.status(_(b"path: %s \n") % path)
                  ui.status(_(b"key: %s \n") % (short(firstnode)))
                  ui.status(_(b"\n"))
                  ui.status(
                      _(b"%12s => %12s %13s %13s %12s\n")
                      % (b"node", b"p1", b"p2", b"linknode", b"copyfrom")
                  )
                  queue = [firstnode]
                  while queue:
                      node = queue.pop(0)
                      p1, p2, linknode, copyfrom = mapping[node]
                      ui.status(
                          _(b"%s => %s  %s  %s  %s\n")
                          % (short(node), short(p1), short(p2), short(linknode), copyfrom)
                      )
                      if p1 != nullid:
                          queue.append(p1)
                      if p2 != nullid:
                          queue.append(p2)
              def buildtemprevlog(repo, file):
                  # get filename key
-                 filekey = nodemod.hex(hashlib.sha1(file).digest())
+                 filekey = nodemod.hex(hashutil.sha1(file).digest())
                  filedir = os.path.join(repo.path, b'store/data', filekey)
                  # sort all entries based on linkrev
                  fctxs = []
                  for filenode in os.listdir(filedir):
                      if b'_old' not in filenode:
                          fctxs.append(repo.filectx(file, fileid=bin(filenode)))
                  fctxs = sorted(fctxs, key=lambda x: x.linkrev())
                  # add to revlog
                  temppath = repo.sjoin(b'data/temprevlog.i')
                  if os.path.exists(temppath):
                      os.remove(temppath)
                  r = filelog.filelog(repo.svfs, b'temprevlog')
                  class faket(object):
                      def add(self, a, b, c):
                          pass
                  t = faket()
                  for fctx in fctxs:
                      if fctx.node() not in repo:
                          continue
                      p = fctx.filelog().parents(fctx.filenode())
                      meta = {}
                      if fctx.renamed():
                          meta[b'copy'] = fctx.renamed()[0]
                          meta[b'copyrev'] = hex(fctx.renamed()[1])
                      r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
                  return r
              def debugindex(orig, ui, repo, file_=None, **opts):
                  """dump the contents of an index file"""
                  if (
                      opts.get('changelog')
                      or opts.get('manifest')
                      or opts.get('dir')
                      or not shallowutil.isenabled(repo)
                      or not repo.shallowmatch(file_)
                  ):
                      return orig(ui, repo, file_, **opts)
                  r = buildtemprevlog(repo, file_)
                  # debugindex like normal
                  format = opts.get(b'format', 0)
                  if format not in (0, 1):
                      raise error.Abort(_(b"unknown format %d") % format)
                  generaldelta = r.version & revlog.FLAG_GENERALDELTA
                  if generaldelta:
                      basehdr = b' delta'
                  else:
                      basehdr = b'  base'
                  if format == 0:
                      ui.write(
                          (
                              b"   rev    offset  length " + basehdr + b" linkrev"
                              b" nodeid       p1           p2\n"
                          )
                      )
                  elif format == 1:
                      ui.write(
                          (
                              b"   rev flag   offset   length"
                              b"     size " + basehdr + b"   link     p1     p2"
                              b"       nodeid\n"
                          )
                      )
                  for i in r:
                      node = r.node(i)
                      if generaldelta:
                          base = r.deltaparent(i)
                      else:
                          base = r.chainbase(i)
                      if format == 0:
                          try:
                              pp = r.parents(node)
                          except Exception:
                              pp = [nullid, nullid]
                          ui.write(
                              b"% 6d % 9d % 7d % 6d % 7d %s %s %s\n"
                              % (
                                  i,
                                  r.start(i),
                                  r.length(i),
                                  base,
                                  r.linkrev(i),
                                  short(node),
                                  short(pp[0]),
                                  short(pp[1]),
                              )
                          )
                      elif format == 1:
                          pr = r.parentrevs(i)
                          ui.write(
                              b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n"
                              % (
                                  i,
                                  r.flags(i),
                                  r.start(i),
                                  r.length(i),
                                  r.rawsize(i),
                                  base,
                                  r.linkrev(i),
                                  pr[0],
                                  pr[1],
                                  short(node),
                              )
                          )
              def debugindexdot(orig, ui, repo, file_):
                  """dump an index DAG as a graphviz dot file"""
                  if not shallowutil.isenabled(repo):
                      return orig(ui, repo, file_)
                  r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
                  ui.writenoi18n(b"digraph G {\n")
                  for i in r:
                      node = r.node(i)
                      pp = r.parents(node)
                      ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
                      if pp[1] != nullid:
                          ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
                  ui.write(b"}\n")
              def verifyremotefilelog(ui, path, **opts):
                  decompress = opts.get('decompress')
                  for root, dirs, files in os.walk(path):
                      for file in files:
                          if file == b"repos":
                              continue
                          filepath = os.path.join(root, file)
                          size, firstnode, mapping = parsefileblob(filepath, decompress)
                          for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping):
                              if linknode == nullid:
                                  actualpath = os.path.relpath(root, path)
                                  key = fileserverclient.getcachekey(
                                      b"reponame", actualpath, file
                                  )
                                  ui.status(
                                      b"%s %s\n" % (key, os.path.relpath(filepath, path))
                                  )
              def _decompressblob(raw):
                  return zlib.decompress(raw)
              def parsefileblob(path, decompress):
                  f = open(path, b"rb")
                  try:
                      raw = f.read()
                  finally:
                      f.close()
                  if decompress:
                      raw = _decompressblob(raw)
                  offset, size, flags = shallowutil.parsesizeflags(raw)
                  start = offset + size
                  firstnode = None
                  mapping = {}
                  while start < len(raw):
                      divider = raw.index(b'\0', start + 80)
                      currentnode = raw[start : (start + 20)]
                      if not firstnode:
                          firstnode = currentnode
                      p1 = raw[(start + 20) : (start + 40)]
                      p2 = raw[(start + 40) : (start + 60)]
                      linknode = raw[(start + 60) : (start + 80)]
                      copyfrom = raw[(start + 80) : divider]
                      mapping[currentnode] = (p1, p2, linknode, copyfrom)
                      start = divider + 1
                  return size, firstnode, mapping
              def debugdatapack(ui, *paths, **opts):
                  for path in paths:
                      if b'.data' in path:
                          path = path[: path.index(b'.data')]
                      ui.write(b"%s:\n" % path)
                      dpack = datapack.datapack(path)
                      node = opts.get('node')
                      if node:
                          deltachain = dpack.getdeltachain(b'', bin(node))
                          dumpdeltachain(ui, deltachain, **opts)
                          return
                      if opts.get('long'):
                          hashformatter = hex
                          hashlen = 42
                      else:
                          hashformatter = short
                          hashlen = 14
                      lastfilename = None
                      totaldeltasize = 0
                      totalblobsize = 0
                      def printtotals():
                          if lastfilename is not None:
                              ui.write(b"\n")
                          if not totaldeltasize or not totalblobsize:
                              return
                          difference = totalblobsize - totaldeltasize
                          deltastr = b"%0.1f%% %s" % (
                              (100.0 * abs(difference) / totalblobsize),
                              (b"smaller" if difference > 0 else b"bigger"),
                          )
                          ui.writenoi18n(
                              b"Total:%s%s  %s (%s)\n"
                              % (
                                  b"".ljust(2 * hashlen - len(b"Total:")),
                                  (b'%d' % totaldeltasize).ljust(12),
                                  (b'%d' % totalblobsize).ljust(9),
                                  deltastr,
                              )
                          )
                      bases = {}
                      nodes = set()
                      failures = 0
                      for filename, node, deltabase, deltalen in dpack.iterentries():
                          bases[node] = deltabase
                          if node in nodes:
                              ui.write((b"Bad entry: %s appears twice\n" % short(node)))
                              failures += 1
                          nodes.add(node)
                          if filename != lastfilename:
                              printtotals()
                              name = b'(empty name)' if filename == b'' else filename
                              ui.write(b"%s:\n" % name)
                              ui.write(
                                  b"%s%s%s%s\n"
                                  % (
                                      b"Node".ljust(hashlen),
                                      b"Delta Base".ljust(hashlen),
                                      b"Delta Length".ljust(14),
                                      b"Blob Size".ljust(9),
                                  )
                              )
                              lastfilename = filename
                              totalblobsize = 0
                              totaldeltasize = 0
                          # Metadata could be missing, in which case it will be an empty dict.
                          meta = dpack.getmeta(filename, node)
                          if constants.METAKEYSIZE in meta:
                              blobsize = meta[constants.METAKEYSIZE]
                              totaldeltasize += deltalen
                              totalblobsize += blobsize
                          else:
                              blobsize = b"(missing)"
                          ui.write(
                              b"%s  %s  %s%s\n"
                              % (
                                  hashformatter(node),
                                  hashformatter(deltabase),
                                  (b'%d' % deltalen).ljust(14),
                                  pycompat.bytestr(blobsize),
                              )
                          )
                      if filename is not None:
                          printtotals()
                      failures += _sanitycheck(ui, set(nodes), bases)
                      if failures > 1:
                          ui.warn((b"%d failures\n" % failures))
                          return 1
              def _sanitycheck(ui, nodes, bases):
                  """
                  Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
                  mapping of node->base):
                  - Each deltabase must itself be a node elsewhere in the pack
                  - There must be no cycles
                  """
                  failures = 0
                  for node in nodes:
                      seen = set()
                      current = node
                      deltabase = bases[current]
                      while deltabase != nullid:
                          if deltabase not in nodes:
                              ui.warn(
                                  (
                                      b"Bad entry: %s has an unknown deltabase (%s)\n"
                                      % (short(node), short(deltabase))
                                  )
                              )
                              failures += 1
                              break
                          if deltabase in seen:
                              ui.warn(
                                  (
                                      b"Bad entry: %s has a cycle (at %s)\n"
                                      % (short(node), short(deltabase))
                                  )
                              )
                              failures += 1
                              break
                          current = deltabase
                          seen.add(current)
                          deltabase = bases[current]
                      # Since ``node`` begins a valid chain, reset/memoize its base to nullid
                      # so we don't traverse it again.
                      bases[node] = nullid
                  return failures
              def dumpdeltachain(ui, deltachain, **opts):
                  hashformatter = hex
                  hashlen = 40
                  lastfilename = None
                  for filename, node, filename, deltabasenode, delta in deltachain:
                      if filename != lastfilename:
                          ui.write(b"\n%s\n" % filename)
                          lastfilename = filename
                      ui.write(
                          b"%s  %s  %s  %s\n"
                          % (
                              b"Node".ljust(hashlen),
                              b"Delta Base".ljust(hashlen),
                              b"Delta SHA1".ljust(hashlen),
                              b"Delta Length".ljust(6),
                          )
                      )
                      ui.write(
                          b"%s  %s  %s  %d\n"
                          % (
                              hashformatter(node),
                              hashformatter(deltabasenode),
-                             nodemod.hex(hashlib.sha1(delta).digest()),
+                             nodemod.hex(hashutil.sha1(delta).digest()),
                              len(delta),
                          )
                      )
              def debughistorypack(ui, path):
                  if b'.hist' in path:
                      path = path[: path.index(b'.hist')]
                  hpack = historypack.historypack(path)
                  lastfilename = None
                  for entry in hpack.iterentries():
                      filename, node, p1node, p2node, linknode, copyfrom = entry
                      if filename != lastfilename:
                          ui.write(b"\n%s\n" % filename)
                          ui.write(
                              b"%s%s%s%s%s\n"
                              % (
                                  b"Node".ljust(14),
                                  b"P1 Node".ljust(14),
                                  b"P2 Node".ljust(14),
                                  b"Link Node".ljust(14),
                                  b"Copy From",
                              )
                          )
                          lastfilename = filename
                      ui.write(
                          b"%s  %s  %s  %s  %s\n"
                          % (
                              short(node),
                              short(p1node),
                              short(p2node),
                              short(linknode),
                              copyfrom,
                          )
                      )
              def debugwaitonrepack(repo):
                  with lockmod.lock(repack.repacklockvfs(repo), b"repacklock", timeout=-1):
                      return
              def debugwaitonprefetch(repo):
                  with repo._lock(
                      repo.svfs,
                      b"prefetchlock",
                      True,
                      None,
                      None,
                      _(b'prefetching in %s') % repo.origroot,
                  ):
                      pass

hgext/remotefilelog/fileserverclient.py

0 +6 -4

              # fileserverclient.py - client for communicating with the cache process
              #
              # Copyright 2013 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
-             import hashlib
              import io
              import os
              import threading
              import time
              import zlib
              from mercurial.i18n import _
              from mercurial.node import bin, hex, nullid
              from mercurial import (
                  error,
                  node,
                  pycompat,
                  revlog,
                  sshpeer,
                  util,
                  wireprotov1peer,
              )
-             from mercurial.utils import procutil
+             from mercurial.utils import (
+                 hashutil,
+                 procutil,
+             )
              from . import (
                  constants,
                  contentstore,
                  metadatastore,
              )
              _sshv1peer = sshpeer.sshv1peer
              # Statistics for debugging
              fetchcost = 0
              fetches = 0
              fetched = 0
              fetchmisses = 0
              _lfsmod = None
              def getcachekey(reponame, file, id):
-                 pathhash = node.hex(hashlib.sha1(file).digest())
+                 pathhash = node.hex(hashutil.sha1(file).digest())
                  return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
              def getlocalkey(file, id):
-                 pathhash = node.hex(hashlib.sha1(file).digest())
+                 pathhash = node.hex(hashutil.sha1(file).digest())
                  return os.path.join(pathhash, id)
              def peersetup(ui, peer):
                  class remotefilepeer(peer.__class__):
                      @wireprotov1peer.batchable
                      def x_rfl_getfile(self, file, node):
                          if not self.capable(b'x_rfl_getfile'):
                              raise error.Abort(
                                  b'configured remotefile server does not support getfile'
                              )
                          f = wireprotov1peer.future()
                          yield {b'file': file, b'node': node}, f
                          code, data = f.value.split(b'\0', 1)
                          if int(code):
                              raise error.LookupError(file, node, data)
                          yield data
                      @wireprotov1peer.batchable
                      def x_rfl_getflogheads(self, path):
                          if not self.capable(b'x_rfl_getflogheads'):
                              raise error.Abort(
                                  b'configured remotefile server does not '
                                  b'support getflogheads'
                              )
                          f = wireprotov1peer.future()
                          yield {b'path': path}, f
                          heads = f.value.split(b'\n') if f.value else []
                          yield heads
                      def _updatecallstreamopts(self, command, opts):
                          if command != b'getbundle':
                              return
                          if (
                              constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                              not in self.capabilities()
                          ):
                              return
                          if not util.safehasattr(self, '_localrepo'):
                              return
                          if (
                              constants.SHALLOWREPO_REQUIREMENT
                              not in self._localrepo.requirements
                          ):
                              return
                          bundlecaps = opts.get(b'bundlecaps')
                          if bundlecaps:
                              bundlecaps = [bundlecaps]
                          else:
                              bundlecaps = []
                          # shallow, includepattern, and excludepattern are a hacky way of
                          # carrying over data from the local repo to this getbundle
                          # command. We need to do it this way because bundle1 getbundle
                          # doesn't provide any other place we can hook in to manipulate
                          # getbundle args before it goes across the wire. Once we get rid
                          # of bundle1, we can use bundle2's _pullbundle2extraprepare to
                          # do this more cleanly.
                          bundlecaps.append(constants.BUNDLE2_CAPABLITY)
                          if self._localrepo.includepattern:
                              patterns = b'\0'.join(self._localrepo.includepattern)
                              includecap = b"includepattern=" + patterns
                              bundlecaps.append(includecap)
                          if self._localrepo.excludepattern:
                              patterns = b'\0'.join(self._localrepo.excludepattern)
                              excludecap = b"excludepattern=" + patterns
                              bundlecaps.append(excludecap)
                          opts[b'bundlecaps'] = b','.join(bundlecaps)
                      def _sendrequest(self, command, args, **opts):
                          self._updatecallstreamopts(command, args)
                          return super(remotefilepeer, self)._sendrequest(
                              command, args, **opts
                          )
                      def _callstream(self, command, **opts):
                          supertype = super(remotefilepeer, self)
                          if not util.safehasattr(supertype, '_sendrequest'):
                              self._updatecallstreamopts(command, pycompat.byteskwargs(opts))
                          return super(remotefilepeer, self)._callstream(command, **opts)
                  peer.__class__ = remotefilepeer
              class cacheconnection(object):
                  """The connection for communicating with the remote cache. Performs
                  gets and sets by communicating with an external process that has the
                  cache-specific implementation.
                  """
                  def __init__(self):
                      self.pipeo = self.pipei = self.pipee = None
                      self.subprocess = None
                      self.connected = False
                  def connect(self, cachecommand):
                      if self.pipeo:
                          raise error.Abort(_(b"cache connection already open"))
                      self.pipei, self.pipeo, self.pipee, self.subprocess = procutil.popen4(
                          cachecommand
                      )
                      self.connected = True
                  def close(self):
                      def tryclose(pipe):
                          try:
                              pipe.close()
                          except Exception:
                              pass
                      if self.connected:
                          try:
                              self.pipei.write(b"exit\n")
                          except Exception:
                              pass
                          tryclose(self.pipei)
                          self.pipei = None
                          tryclose(self.pipeo)
                          self.pipeo = None
                          tryclose(self.pipee)
                          self.pipee = None
                          try:
                              # Wait for process to terminate, making sure to avoid deadlock.
                              # See https://docs.python.org/2/library/subprocess.html for
                              # warnings about wait() and deadlocking.
                              self.subprocess.communicate()
                          except Exception:
                              pass
                          self.subprocess = None
                      self.connected = False
                  def request(self, request, flush=True):
                      if self.connected:
                          try:
                              self.pipei.write(request)
                              if flush:
                                  self.pipei.flush()
                          except IOError:
                              self.close()
                  def receiveline(self):
                      if not self.connected:
                          return None
                      try:
                          result = self.pipeo.readline()[:-1]
                          if not result:
                              self.close()
                      except IOError:
                          self.close()
                      return result
              def _getfilesbatch(
                  remote, receivemissing, progresstick, missed, idmap, batchsize
              ):
                  # Over http(s), iterbatch is a streamy method and we can start
                  # looking at results early. This means we send one (potentially
                  # large) request, but then we show nice progress as we process
                  # file results, rather than showing chunks of $batchsize in
                  # progress.
                  #
                  # Over ssh, iterbatch isn't streamy because batch() wasn't
                  # explicitly designed as a streaming method. In the future we
                  # should probably introduce a streambatch() method upstream and
                  # use that for this.
                  with remote.commandexecutor() as e:
                      futures = []
                      for m in missed:
                          futures.append(
                              e.callcommand(
                                  b'x_rfl_getfile', {b'file': idmap[m], b'node': m[-40:]}
                              )
                          )
                      for i, m in enumerate(missed):
                          r = futures[i].result()
                          futures[i] = None  # release memory
                          file_ = idmap[m]
                          node = m[-40:]
                          receivemissing(io.BytesIO(b'%d\n%s' % (len(r), r)), file_, node)
                          progresstick()
              def _getfiles_optimistic(
                  remote, receivemissing, progresstick, missed, idmap, step
              ):
                  remote._callstream(b"x_rfl_getfiles")
                  i = 0
                  pipeo = remote._pipeo
                  pipei = remote._pipei
                  while i < len(missed):
                      # issue a batch of requests
                      start = i
                      end = min(len(missed), start + step)
                      i = end
                      for missingid in missed[start:end]:
                          # issue new request
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          sshrequest = b"%s%s\n" % (versionid, file)
                          pipeo.write(sshrequest)
                      pipeo.flush()
                      # receive batch results
                      for missingid in missed[start:end]:
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          receivemissing(pipei, file, versionid)
                          progresstick()
                  # End the command
                  pipeo.write(b'\n')
                  pipeo.flush()
              def _getfiles_threaded(
                  remote, receivemissing, progresstick, missed, idmap, step
              ):
                  remote._callstream(b"getfiles")
                  pipeo = remote._pipeo
                  pipei = remote._pipei
                  def writer():
                      for missingid in missed:
                          versionid = missingid[-40:]
                          file = idmap[missingid]
                          sshrequest = b"%s%s\n" % (versionid, file)
                          pipeo.write(sshrequest)
                      pipeo.flush()
                  writerthread = threading.Thread(target=writer)
                  writerthread.daemon = True
                  writerthread.start()
                  for missingid in missed:
                      versionid = missingid[-40:]
                      file = idmap[missingid]
                      receivemissing(pipei, file, versionid)
                      progresstick()
                  writerthread.join()
                  # End the command
                  pipeo.write(b'\n')
                  pipeo.flush()
              class fileserverclient(object):
                  """A client for requesting files from the remote file server.
                  """
                  def __init__(self, repo):
                      ui = repo.ui
                      self.repo = repo
                      self.ui = ui
                      self.cacheprocess = ui.config(b"remotefilelog", b"cacheprocess")
                      if self.cacheprocess:
                          self.cacheprocess = util.expandpath(self.cacheprocess)
                      # This option causes remotefilelog to pass the full file path to the
                      # cacheprocess instead of a hashed key.
                      self.cacheprocesspasspath = ui.configbool(
                          b"remotefilelog", b"cacheprocess.includepath"
                      )
                      self.debugoutput = ui.configbool(b"remotefilelog", b"debug")
                      self.remotecache = cacheconnection()
                  def setstore(self, datastore, historystore, writedata, writehistory):
                      self.datastore = datastore
                      self.historystore = historystore
                      self.writedata = writedata
                      self.writehistory = writehistory
                  def _connect(self):
                      return self.repo.connectionpool.get(self.repo.fallbackpath)
                  def request(self, fileids):
                      """Takes a list of filename/node pairs and fetches them from the
                      server. Files are stored in the local cache.
                      A list of nodes that the server couldn't find is returned.
                      If the connection fails, an exception is raised.
                      """
                      if not self.remotecache.connected:
                          self.connect()
                      cache = self.remotecache
                      writedata = self.writedata
                      repo = self.repo
                      total = len(fileids)
                      request = b"get\n%d\n" % total
                      idmap = {}
                      reponame = repo.name
                      for file, id in fileids:
                          fullid = getcachekey(reponame, file, id)
                          if self.cacheprocesspasspath:
                              request += file + b'\0'
                          request += fullid + b"\n"
                          idmap[fullid] = file
                      cache.request(request)
                      progress = self.ui.makeprogress(_(b'downloading'), total=total)
                      progress.update(0)
                      missed = []
                      while True:
                          missingid = cache.receiveline()
                          if not missingid:
                              missedset = set(missed)
                              for missingid in idmap:
                                  if not missingid in missedset:
                                      missed.append(missingid)
                              self.ui.warn(
                                  _(
                                      b"warning: cache connection closed early - "
                                      + b"falling back to server\n"
                                  )
                              )
                              break
                          if missingid == b"0":
                              break
                          if missingid.startswith(b"_hits_"):
                              # receive progress reports
                              parts = missingid.split(b"_")
                              progress.increment(int(parts[2]))
                              continue
                          missed.append(missingid)
                      global fetchmisses
                      fetchmisses += len(missed)
                      fromcache = total - len(missed)
                      progress.update(fromcache, total=total)
                      self.ui.log(
                          b"remotefilelog",
                          b"remote cache hit rate is %r of %r\n",
                          fromcache,
                          total,
                          hit=fromcache,
                          total=total,
                      )
                      oldumask = os.umask(0o002)
                      try:
                          # receive cache misses from master
                          if missed:
                              # When verbose is true, sshpeer prints 'running ssh...'
                              # to stdout, which can interfere with some command
                              # outputs
                              verbose = self.ui.verbose
                              self.ui.verbose = False
                              try:
                                  with self._connect() as conn:
                                      remote = conn.peer
                                      if remote.capable(
                                          constants.NETWORK_CAP_LEGACY_SSH_GETFILES
                                      ):
                                          if not isinstance(remote, _sshv1peer):
                                              raise error.Abort(
                                                  b'remotefilelog requires ssh servers'
                                              )
                                          step = self.ui.configint(
                                              b'remotefilelog', b'getfilesstep'
                                          )
                                          getfilestype = self.ui.config(
                                              b'remotefilelog', b'getfilestype'
                                          )
                                          if getfilestype == b'threaded':
                                              _getfiles = _getfiles_threaded
                                          else:
                                              _getfiles = _getfiles_optimistic
                                          _getfiles(
                                              remote,
                                              self.receivemissing,
                                              progress.increment,
                                              missed,
                                              idmap,
                                              step,
                                          )
                                      elif remote.capable(b"x_rfl_getfile"):
                                          if remote.capable(b'batch'):
                                              batchdefault = 100
                                          else:
                                              batchdefault = 10
                                          batchsize = self.ui.configint(
                                              b'remotefilelog', b'batchsize', batchdefault
                                          )
                                          self.ui.debug(
                                              b'requesting %d files from '
                                              b'remotefilelog server...\n' % len(missed)
                                          )
                                          _getfilesbatch(
                                              remote,
                                              self.receivemissing,
                                              progress.increment,
                                              missed,
                                              idmap,
                                              batchsize,
                                          )
                                      else:
                                          raise error.Abort(
                                              b"configured remotefilelog server"
                                              b" does not support remotefilelog"
                                          )
                                  self.ui.log(
                                      b"remotefilefetchlog",
                                      b"Success\n",
                                      fetched_files=progress.pos - fromcache,
                                      total_to_fetch=total - fromcache,
                                  )
                              except Exception:
                                  self.ui.log(
                                      b"remotefilefetchlog",
                                      b"Fail\n",
                                      fetched_files=progress.pos - fromcache,
                                      total_to_fetch=total - fromcache,
                                  )
                                  raise
                              finally:
                                  self.ui.verbose = verbose
                              # send to memcache
                              request = b"set\n%d\n%s\n" % (len(missed), b"\n".join(missed))
                              cache.request(request)
                          progress.complete()
                          # mark ourselves as a user of this cache
                          writedata.markrepo(self.repo.path)
                      finally:
                          os.umask(oldumask)
                  def receivemissing(self, pipe, filename, node):
                      line = pipe.readline()[:-1]
                      if not line:
                          raise error.ResponseError(
                              _(b"error downloading file contents:"),
                              _(b"connection closed early"),
                          )
                      size = int(line)
                      data = pipe.read(size)
                      if len(data) != size:
                          raise error.ResponseError(
                              _(b"error downloading file contents:"),
                              _(b"only received %s of %s bytes") % (len(data), size),
                          )
                      self.writedata.addremotefilelognode(
                          filename, bin(node), zlib.decompress(data)
                      )
                  def connect(self):
                      if self.cacheprocess:
                          cmd = b"%s %s" % (self.cacheprocess, self.writedata._path)
                          self.remotecache.connect(cmd)
                      else:
                          # If no cache process is specified, we fake one that always
                          # returns cache misses.  This enables tests to run easily
                          # and may eventually allow us to be a drop in replacement
                          # for the largefiles extension.
                          class simplecache(object):
                              def __init__(self):
                                  self.missingids = []
                                  self.connected = True
                              def close(self):
                                  pass
                              def request(self, value, flush=True):
                                  lines = value.split(b"\n")
                                  if lines[0] != b"get":
                                      return
                                  self.missingids = lines[2:-1]
                                  self.missingids.append(b'0')
                              def receiveline(self):
                                  if len(self.missingids) > 0:
                                      return self.missingids.pop(0)
                                  return None
                          self.remotecache = simplecache()
                  def close(self):
                      if fetches:
                          msg = (
                              b"%d files fetched over %d fetches - "
                              + b"(%d misses, %0.2f%% hit ratio) over %0.2fs\n"
                          ) % (
                              fetched,
                              fetches,
                              fetchmisses,
                              float(fetched - fetchmisses) / float(fetched) * 100.0,
                              fetchcost,
                          )
                          if self.debugoutput:
                              self.ui.warn(msg)
                          self.ui.log(
                              b"remotefilelog.prefetch",
                              msg.replace(b"%", b"%%"),
                              remotefilelogfetched=fetched,
                              remotefilelogfetches=fetches,
                              remotefilelogfetchmisses=fetchmisses,
                              remotefilelogfetchtime=fetchcost * 1000,
                          )
                      if self.remotecache.connected:
                          self.remotecache.close()
                  def prefetch(
                      self, fileids, force=False, fetchdata=True, fetchhistory=False
                  ):
                      """downloads the given file versions to the cache
                      """
                      repo = self.repo
                      idstocheck = []
                      for file, id in fileids:
                          # hack
                          # - we don't use .hgtags
                          # - workingctx produces ids with length 42,
                          #   which we skip since they aren't in any cache
                          if (
                              file == b'.hgtags'
                              or len(id) == 42
                              or not repo.shallowmatch(file)
                          ):
                              continue
                          idstocheck.append((file, bin(id)))
                      datastore = self.datastore
                      historystore = self.historystore
                      if force:
                          datastore = contentstore.unioncontentstore(*repo.shareddatastores)
                          historystore = metadatastore.unionmetadatastore(
                              *repo.sharedhistorystores
                          )
                      missingids = set()
                      if fetchdata:
                          missingids.update(datastore.getmissing(idstocheck))
                      if fetchhistory:
                          missingids.update(historystore.getmissing(idstocheck))
                      # partition missing nodes into nullid and not-nullid so we can
                      # warn about this filtering potentially shadowing bugs.
                      nullids = len([None for unused, id in missingids if id == nullid])
                      if nullids:
                          missingids = [(f, id) for f, id in missingids if id != nullid]
                          repo.ui.develwarn(
                              (
                                  b'remotefilelog not fetching %d null revs'
                                  b' - this is likely hiding bugs' % nullids
                              ),
                              config=b'remotefilelog-ext',
                          )
                      if missingids:
                          global fetches, fetched, fetchcost
                          fetches += 1
                          # We want to be able to detect excess individual file downloads, so
                          # let's log that information for debugging.
                          if fetches >= 15 and fetches < 18:
                              if fetches == 15:
                                  fetchwarning = self.ui.config(
                                      b'remotefilelog', b'fetchwarning'
                                  )
                                  if fetchwarning:
                                      self.ui.warn(fetchwarning + b'\n')
                              self.logstacktrace()
                          missingids = [(file, hex(id)) for file, id in sorted(missingids)]
                          fetched += len(missingids)
                          start = time.time()
                          missingids = self.request(missingids)
                          if missingids:
                              raise error.Abort(
                                  _(b"unable to download %d files") % len(missingids)
                              )
                          fetchcost += time.time() - start
                          self._lfsprefetch(fileids)
                  def _lfsprefetch(self, fileids):
                      if not _lfsmod or not util.safehasattr(
                          self.repo.svfs, b'lfslocalblobstore'
                      ):
                          return
                      if not _lfsmod.wrapper.candownload(self.repo):
                          return
                      pointers = []
                      store = self.repo.svfs.lfslocalblobstore
                      for file, id in fileids:
                          node = bin(id)
                          rlog = self.repo.file(file)
                          if rlog.flags(node) & revlog.REVIDX_EXTSTORED:
                              text = rlog.rawdata(node)
                              p = _lfsmod.pointer.deserialize(text)
                              oid = p.oid()
                              if not store.has(oid):
                                  pointers.append(p)
                      if len(pointers) > 0:
                          self.repo.svfs.lfsremoteblobstore.readbatch(pointers, store)
                          assert all(store.has(p.oid()) for p in pointers)
                  def logstacktrace(self):
                      import traceback
                      self.ui.log(
                          b'remotefilelog',
                          b'excess remotefilelog fetching:\n%s\n',
                          b''.join(pycompat.sysbytes(s) for s in traceback.format_stack()),
                      )

hgext/remotefilelog/historypack.py

0 +4 -4

              from __future__ import absolute_import
-             import hashlib
              import struct
              from mercurial.node import hex, nullid
              from mercurial import (
                  pycompat,
                  util,
              )
+             from mercurial.utils import hashutil
              from . import (
                  basepack,
                  constants,
                  shallowutil,
              )
              # (filename hash, offset, size)
              INDEXFORMAT2 = b'!20sQQII'
              INDEXENTRYLENGTH2 = struct.calcsize(INDEXFORMAT2)
              NODELENGTH = 20
              NODEINDEXFORMAT = b'!20sQ'
              NODEINDEXENTRYLENGTH = struct.calcsize(NODEINDEXFORMAT)
              # (node, p1, p2, linknode)
              PACKFORMAT = b"!20s20s20s20sH"
              PACKENTRYLENGTH = 82
              ENTRYCOUNTSIZE = 4
              INDEXSUFFIX = b'.histidx'
              PACKSUFFIX = b'.histpack'
              ANC_NODE = 0
              ANC_P1NODE = 1
              ANC_P2NODE = 2
              ANC_LINKNODE = 3
              ANC_COPYFROM = 4
              class historypackstore(basepack.basepackstore):
                  INDEXSUFFIX = INDEXSUFFIX
                  PACKSUFFIX = PACKSUFFIX
                  def getpack(self, path):
                      return historypack(path)
                  def getancestors(self, name, node, known=None):
                      for pack in self.packs:
                          try:
                              return pack.getancestors(name, node, known=known)
                          except KeyError:
                              pass
                      for pack in self.refresh():
                          try:
                              return pack.getancestors(name, node, known=known)
                          except KeyError:
                              pass
                      raise KeyError((name, node))
                  def getnodeinfo(self, name, node):
                      for pack in self.packs:
                          try:
                              return pack.getnodeinfo(name, node)
                          except KeyError:
                              pass
                      for pack in self.refresh():
                          try:
                              return pack.getnodeinfo(name, node)
                          except KeyError:
                              pass
                      raise KeyError((name, node))
                  def add(self, filename, node, p1, p2, linknode, copyfrom):
                      raise RuntimeError(
                          b"cannot add to historypackstore (%s:%s)" % (filename, hex(node))
                      )
              class historypack(basepack.basepack):
                  INDEXSUFFIX = INDEXSUFFIX
                  PACKSUFFIX = PACKSUFFIX
                  SUPPORTED_VERSIONS = [2]
                  def __init__(self, path):
                      super(historypack, self).__init__(path)
                      self.INDEXFORMAT = INDEXFORMAT2
                      self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
                  def getmissing(self, keys):
                      missing = []
                      for name, node in keys:
                          try:
                              self._findnode(name, node)
                          except KeyError:
                              missing.append((name, node))
                      return missing
                  def getancestors(self, name, node, known=None):
                      """Returns as many ancestors as we're aware of.
                      return value: {
                         node: (p1, p2, linknode, copyfrom),
                         ...
                      }
                      """
                      if known and node in known:
                          return []
                      ancestors = self._getancestors(name, node, known=known)
                      results = {}
                      for ancnode, p1, p2, linknode, copyfrom in ancestors:
                          results[ancnode] = (p1, p2, linknode, copyfrom)
                      if not results:
                          raise KeyError((name, node))
                      return results
                  def getnodeinfo(self, name, node):
                      # Drop the node from the tuple before returning, since the result should
                      # just be (p1, p2, linknode, copyfrom)
                      return self._findnode(name, node)[1:]
                  def _getancestors(self, name, node, known=None):
                      if known is None:
                          known = set()
                      section = self._findsection(name)
                      filename, offset, size, nodeindexoffset, nodeindexsize = section
                      pending = set((node,))
                      o = 0
                      while o < size:
                          if not pending:
                              break
                          entry, copyfrom = self._readentry(offset + o)
                          o += PACKENTRYLENGTH
                          if copyfrom:
                              o += len(copyfrom)
                          ancnode = entry[ANC_NODE]
                          if ancnode in pending:
                              pending.remove(ancnode)
                              p1node = entry[ANC_P1NODE]
                              p2node = entry[ANC_P2NODE]
                              if p1node != nullid and p1node not in known:
                                  pending.add(p1node)
                              if p2node != nullid and p2node not in known:
                                  pending.add(p2node)
                              yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
                  def _readentry(self, offset):
                      data = self._data
                      entry = struct.unpack(
                          PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
                      )
                      copyfrom = None
                      copyfromlen = entry[ANC_COPYFROM]
                      if copyfromlen != 0:
                          offset += PACKENTRYLENGTH
                          copyfrom = data[offset : offset + copyfromlen]
                      return entry, copyfrom
                  def add(self, filename, node, p1, p2, linknode, copyfrom):
                      raise RuntimeError(
                          b"cannot add to historypack (%s:%s)" % (filename, hex(node))
                      )
                  def _findnode(self, name, node):
                      if self.VERSION == 0:
                          ancestors = self._getancestors(name, node)
                          for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
                              if ancnode == node:
                                  return (ancnode, p1node, p2node, linknode, copyfrom)
                      else:
                          section = self._findsection(name)
                          nodeindexoffset, nodeindexsize = section[3:]
                          entry = self._bisect(
                              node,
                              nodeindexoffset,
                              nodeindexoffset + nodeindexsize,
                              NODEINDEXENTRYLENGTH,
                          )
                          if entry is not None:
                              node, offset = struct.unpack(NODEINDEXFORMAT, entry)
                              entry, copyfrom = self._readentry(offset)
                              # Drop the copyfromlen from the end of entry, and replace it
                              # with the copyfrom string.
                              return entry[:4] + (copyfrom,)
                      raise KeyError(b"unable to find history for %s:%s" % (name, hex(node)))
                  def _findsection(self, name):
                      params = self.params
-                     namehash = hashlib.sha1(name).digest()
+                     namehash = hashutil.sha1(name).digest()
                      fanoutkey = struct.unpack(
                          params.fanoutstruct, namehash[: params.fanoutprefix]
                      )[0]
                      fanout = self._fanouttable
                      start = fanout[fanoutkey] + params.indexstart
                      indexend = self._indexend
                      for i in pycompat.xrange(fanoutkey + 1, params.fanoutcount):
                          end = fanout[i] + params.indexstart
                          if end != start:
                              break
                      else:
                          end = indexend
                      entry = self._bisect(namehash, start, end, self.INDEXENTRYLENGTH)
                      if not entry:
                          raise KeyError(name)
                      rawentry = struct.unpack(self.INDEXFORMAT, entry)
                      x, offset, size, nodeindexoffset, nodeindexsize = rawentry
                      rawnamelen = self._index[
                          nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE
                      ]
                      actualnamelen = struct.unpack(b'!H', rawnamelen)[0]
                      nodeindexoffset += constants.FILENAMESIZE
                      actualname = self._index[
                          nodeindexoffset : nodeindexoffset + actualnamelen
                      ]
                      if actualname != name:
                          raise KeyError(
                              b"found file name %s when looking for %s" % (actualname, name)
                          )
                      nodeindexoffset += actualnamelen
                      filenamelength = struct.unpack(
                          b'!H', self._data[offset : offset + constants.FILENAMESIZE]
                      )[0]
                      offset += constants.FILENAMESIZE
                      actualname = self._data[offset : offset + filenamelength]
                      offset += filenamelength
                      if name != actualname:
                          raise KeyError(
                              b"found file name %s when looking for %s" % (actualname, name)
                          )
                      # Skip entry list size
                      offset += ENTRYCOUNTSIZE
                      nodelistoffset = offset
                      nodelistsize = (
                          size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE
                      )
                      return (
                          name,
                          nodelistoffset,
                          nodelistsize,
                          nodeindexoffset,
                          nodeindexsize,
                      )
                  def _bisect(self, node, start, end, entrylen):
                      # Bisect between start and end to find node
                      origstart = start
                      startnode = self._index[start : start + NODELENGTH]
                      endnode = self._index[end : end + NODELENGTH]
                      if startnode == node:
                          return self._index[start : start + entrylen]
                      elif endnode == node:
                          return self._index[end : end + entrylen]
                      else:
                          while start < end - entrylen:
                              mid = start + (end - start) // 2
                              mid = mid - ((mid - origstart) % entrylen)
                              midnode = self._index[mid : mid + NODELENGTH]
                              if midnode == node:
                                  return self._index[mid : mid + entrylen]
                              if node > midnode:
                                  start = mid
                              elif node < midnode:
                                  end = mid
                      return None
                  def markledger(self, ledger, options=None):
                      for filename, node in self:
                          ledger.markhistoryentry(self, filename, node)
                  def cleanup(self, ledger):
                      entries = ledger.sources.get(self, [])
                      allkeys = set(self)
                      repackedkeys = set(
                          (e.filename, e.node) for e in entries if e.historyrepacked
                      )
                      if len(allkeys - repackedkeys) == 0:
                          if self.path not in ledger.created:
                              util.unlinkpath(self.indexpath, ignoremissing=True)
                              util.unlinkpath(self.packpath, ignoremissing=True)
                  def __iter__(self):
                      for f, n, x, x, x, x in self.iterentries():
                          yield f, n
                  def iterentries(self):
                      # Start at 1 to skip the header
                      offset = 1
                      while offset < self.datasize:
                          data = self._data
                          # <2 byte len> + <filename>
                          filenamelen = struct.unpack(
                              b'!H', data[offset : offset + constants.FILENAMESIZE]
                          )[0]
                          offset += constants.FILENAMESIZE
                          filename = data[offset : offset + filenamelen]
                          offset += filenamelen
                          revcount = struct.unpack(
                              b'!I', data[offset : offset + ENTRYCOUNTSIZE]
                          )[0]
                          offset += ENTRYCOUNTSIZE
                          for i in pycompat.xrange(revcount):
                              entry = struct.unpack(
                                  PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
                              )
                              offset += PACKENTRYLENGTH
                              copyfrom = data[offset : offset + entry[ANC_COPYFROM]]
                              offset += entry[ANC_COPYFROM]
                              yield (
                                  filename,
                                  entry[ANC_NODE],
                                  entry[ANC_P1NODE],
                                  entry[ANC_P2NODE],
                                  entry[ANC_LINKNODE],
                                  copyfrom,
                              )
                              self._pagedin += PACKENTRYLENGTH
                          # If we've read a lot of data from the mmap, free some memory.
                          self.freememory()
              class mutablehistorypack(basepack.mutablebasepack):
                  """A class for constructing and serializing a histpack file and index.
                  A history pack is a pair of files that contain the revision history for
                  various file revisions in Mercurial. It contains only revision history (like
                  parent pointers and linknodes), not any revision content information.
                  It consists of two files, with the following format:
                  .histpack
                      The pack itself is a series of file revisions with some basic header
                      information on each.
                      datapack = <version: 1 byte>
                                 [<filesection>,...]
                      filesection = <filename len: 2 byte unsigned int>
                                    <filename>
                                    <revision count: 4 byte unsigned int>
                                    [<revision>,...]
                      revision = <node: 20 byte>
                                 <p1node: 20 byte>
                                 <p2node: 20 byte>
                                 <linknode: 20 byte>
                                 <copyfromlen: 2 byte>
                                 <copyfrom>
                      The revisions within each filesection are stored in topological order
                      (newest first). If a given entry has a parent from another file (a copy)
                      then p1node is the node from the other file, and copyfrom is the
                      filepath of the other file.
                  .histidx
                      The index file provides a mapping from filename to the file section in
                      the histpack. In V1 it also contains sub-indexes for specific nodes
                      within each file. It consists of three parts, the fanout, the file index
                      and the node indexes.
                      The file index is a list of index entries, sorted by filename hash (one
                      per file section in the pack). Each entry has:
                      - node (The 20 byte hash of the filename)
                      - pack entry offset (The location of this file section in the histpack)
                      - pack content size (The on-disk length of this file section's pack
                                           data)
                      - node index offset (The location of the file's node index in the index
                                           file) [1]
                      - node index size (the on-disk length of this file's node index) [1]
                      The fanout is a quick lookup table to reduce the number of steps for
                      bisecting the index. It is a series of 4 byte pointers to positions
                      within the index. It has 2^16 entries, which corresponds to hash
                      prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F
                      points to the index position of the first revision whose node starts
                      with 4F. This saves log(2^16) bisect steps.
                      dataidx = <fanouttable>
                                <file count: 8 byte unsigned> [1]
                                <fileindex>
                                <node count: 8 byte unsigned> [1]
                                [<nodeindex>,...] [1]
                      fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
                      fileindex = [<file index entry>,...]
                      fileindexentry = <node: 20 byte>
                                       <pack file section offset: 8 byte unsigned int>
                                       <pack file section size: 8 byte unsigned int>
                                       <node index offset: 4 byte unsigned int> [1]
                                       <node index size: 4 byte unsigned int>   [1]
                      nodeindex = <filename>[<node index entry>,...] [1]
                      filename = <filename len : 2 byte unsigned int><filename value> [1]
                      nodeindexentry = <node: 20 byte> [1]
                                       <pack file node offset: 8 byte unsigned int> [1]
                  [1]: new in version 1.
                  """
                  INDEXSUFFIX = INDEXSUFFIX
                  PACKSUFFIX = PACKSUFFIX
                  SUPPORTED_VERSIONS = [2]
                  def __init__(self, ui, packpath, version=2):
                      super(mutablehistorypack, self).__init__(ui, packpath, version=version)
                      self.files = {}
                      self.entrylocations = {}
                      self.fileentries = {}
                      self.INDEXFORMAT = INDEXFORMAT2
                      self.INDEXENTRYLENGTH = INDEXENTRYLENGTH2
                      self.NODEINDEXFORMAT = NODEINDEXFORMAT
                      self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
                  def add(self, filename, node, p1, p2, linknode, copyfrom):
                      copyfrom = copyfrom or b''
                      copyfromlen = struct.pack(b'!H', len(copyfrom))
                      self.fileentries.setdefault(filename, []).append(
                          (node, p1, p2, linknode, copyfromlen, copyfrom)
                      )
                  def _write(self):
                      for filename in sorted(self.fileentries):
                          entries = self.fileentries[filename]
                          sectionstart = self.packfp.tell()
                          # Write the file section content
                          entrymap = dict((e[0], e) for e in entries)
                          def parentfunc(node):
                              x, p1, p2, x, x, x = entrymap[node]
                              parents = []
                              if p1 != nullid:
                                  parents.append(p1)
                              if p2 != nullid:
                                  parents.append(p2)
                              return parents
                          sortednodes = list(
                              reversed(
                                  shallowutil.sortnodes((e[0] for e in entries), parentfunc)
                              )
                          )
                          # Write the file section header
                          self.writeraw(
                              b"%s%s%s"
                              % (
                                  struct.pack(b'!H', len(filename)),
                                  filename,
                                  struct.pack(b'!I', len(sortednodes)),
                              )
                          )
                          sectionlen = constants.FILENAMESIZE + len(filename) + 4
                          rawstrings = []
                          # Record the node locations for the index
                          locations = self.entrylocations.setdefault(filename, {})
                          offset = sectionstart + sectionlen
                          for node in sortednodes:
                              locations[node] = offset
                              raw = b'%s%s%s%s%s%s' % entrymap[node]
                              rawstrings.append(raw)
                              offset += len(raw)
                          rawdata = b''.join(rawstrings)
                          sectionlen += len(rawdata)
                          self.writeraw(rawdata)
                          # Record metadata for the index
                          self.files[filename] = (sectionstart, sectionlen)
-                         node = hashlib.sha1(filename).digest()
+                         node = hashutil.sha1(filename).digest()
                          self.entries[node] = node
                  def close(self, ledger=None):
                      if self._closed:
                          return
                      self._write()
                      return super(mutablehistorypack, self).close(ledger=ledger)
                  def createindex(self, nodelocations, indexoffset):
                      fileindexformat = self.INDEXFORMAT
                      fileindexlength = self.INDEXENTRYLENGTH
                      nodeindexformat = self.NODEINDEXFORMAT
                      nodeindexlength = self.NODEINDEXENTRYLENGTH
                      files = (
-                         (hashlib.sha1(filename).digest(), filename, offset, size)
+                         (hashutil.sha1(filename).digest(), filename, offset, size)
                          for filename, (offset, size) in pycompat.iteritems(self.files)
                      )
                      files = sorted(files)
                      # node index is after file index size, file index, and node index size
                      indexlensize = struct.calcsize(b'!Q')
                      nodeindexoffset = (
                          indexoffset
                          + indexlensize
                          + (len(files) * fileindexlength)
                          + indexlensize
                      )
                      fileindexentries = []
                      nodeindexentries = []
                      nodecount = 0
                      for namehash, filename, offset, size in files:
                          # File section index
                          nodelocations = self.entrylocations[filename]
                          nodeindexsize = len(nodelocations) * nodeindexlength
                          rawentry = struct.pack(
                              fileindexformat,
                              namehash,
                              offset,
                              size,
                              nodeindexoffset,
                              nodeindexsize,
                          )
                          # Node index
                          nodeindexentries.append(
                              struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename
                          )
                          nodeindexoffset += constants.FILENAMESIZE + len(filename)
                          for node, location in sorted(pycompat.iteritems(nodelocations)):
                              nodeindexentries.append(
                                  struct.pack(nodeindexformat, node, location)
                              )
                              nodecount += 1
                          nodeindexoffset += len(nodelocations) * nodeindexlength
                          fileindexentries.append(rawentry)
                      nodecountraw = struct.pack(b'!Q', nodecount)
                      return (
                          b''.join(fileindexentries)
                          + nodecountraw
                          + b''.join(nodeindexentries)
                      )

hgext/remotefilelog/shallowutil.py

0 +3 -3

              # shallowutil.py -- remotefilelog utilities
              #
              # Copyright 2014 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import collections
              import errno
-             import hashlib
              import os
              import stat
              import struct
              import tempfile
              from mercurial.i18n import _
              from mercurial.pycompat import open
              from mercurial import (
                  error,
                  node,
                  pycompat,
                  revlog,
                  util,
              )
              from mercurial.utils import (
+                 hashutil,
                  storageutil,
                  stringutil,
              )
              from . import constants
              if not pycompat.iswindows:
                  import grp
              def isenabled(repo):
                  """returns whether the repository is remotefilelog enabled or not"""
                  return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
              def getcachekey(reponame, file, id):
-                 pathhash = node.hex(hashlib.sha1(file).digest())
+                 pathhash = node.hex(hashutil.sha1(file).digest())
                  return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
              def getlocalkey(file, id):
-                 pathhash = node.hex(hashlib.sha1(file).digest())
+                 pathhash = node.hex(hashutil.sha1(file).digest())
                  return os.path.join(pathhash, id)
              def getcachepath(ui, allowempty=False):
                  cachepath = ui.config(b"remotefilelog", b"cachepath")
                  if not cachepath:
                      if allowempty:
                          return None
                      else:
                          raise error.Abort(
                              _(b"could not find config option remotefilelog.cachepath")
                          )
                  return util.expandpath(cachepath)
              def getcachepackpath(repo, category):
                  cachepath = getcachepath(repo.ui)
                  if category != constants.FILEPACK_CATEGORY:
                      return os.path.join(cachepath, repo.name, b'packs', category)
                  else:
                      return os.path.join(cachepath, repo.name, b'packs')
              def getlocalpackpath(base, category):
                  return os.path.join(base, b'packs', category)
              def createrevlogtext(text, copyfrom=None, copyrev=None):
                  """returns a string that matches the revlog contents in a
                  traditional revlog
                  """
                  meta = {}
                  if copyfrom or text.startswith(b'\1\n'):
                      if copyfrom:
                          meta[b'copy'] = copyfrom
                          meta[b'copyrev'] = copyrev
                      text = storageutil.packmeta(meta, text)
                  return text
              def parsemeta(text):
                  """parse mercurial filelog metadata"""
                  meta, size = storageutil.parsemeta(text)
                  if text.startswith(b'\1\n'):
                      s = text.index(b'\1\n', 2)
                      text = text[s + 2 :]
                  return meta or {}, text
              def sumdicts(*dicts):
                  """Adds all the values of *dicts together into one dictionary. This assumes
                  the values in *dicts are all summable.
                  e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
                  """
                  result = collections.defaultdict(lambda: 0)
                  for dict in dicts:
                      for k, v in pycompat.iteritems(dict):
                          result[k] += v
                  return result
              def prefixkeys(dict, prefix):
                  """Returns ``dict`` with ``prefix`` prepended to all its keys."""
                  result = {}
                  for k, v in pycompat.iteritems(dict):
                      result[prefix + k] = v
                  return result
              def reportpackmetrics(ui, prefix, *stores):
                  dicts = [s.getmetrics() for s in stores]
                  dict = prefixkeys(sumdicts(*dicts), prefix + b'_')
                  ui.log(prefix + b"_packsizes", b"\n", **pycompat.strkwargs(dict))
              def _parsepackmeta(metabuf):
                  """parse datapack meta, bytes (<metadata-list>) -> dict
                  The dict contains raw content - both keys and values are strings.
                  Upper-level business may want to convert some of them to other types like
                  integers, on their own.
                  raise ValueError if the data is corrupted
                  """
                  metadict = {}
                  offset = 0
                  buflen = len(metabuf)
                  while buflen - offset >= 3:
                      key = metabuf[offset : offset + 1]
                      offset += 1
                      metalen = struct.unpack_from(b'!H', metabuf, offset)[0]
                      offset += 2
                      if offset + metalen > buflen:
                          raise ValueError(b'corrupted metadata: incomplete buffer')
                      value = metabuf[offset : offset + metalen]
                      metadict[key] = value
                      offset += metalen
                  if offset != buflen:
                      raise ValueError(b'corrupted metadata: redundant data')
                  return metadict
              def _buildpackmeta(metadict):
                  """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
                  The dict contains raw content - both keys and values are strings.
                  Upper-level business may want to serialize some of other types (like
                  integers) to strings before calling this function.
                  raise ProgrammingError when metadata key is illegal, or ValueError if
                  length limit is exceeded
                  """
                  metabuf = b''
                  for k, v in sorted(pycompat.iteritems((metadict or {}))):
                      if len(k) != 1:
                          raise error.ProgrammingError(b'packmeta: illegal key: %s' % k)
                      if len(v) > 0xFFFE:
                          raise ValueError(
                              b'metadata value is too long: 0x%x > 0xfffe' % len(v)
                          )
                      metabuf += k
                      metabuf += struct.pack(b'!H', len(v))
                      metabuf += v
                  # len(metabuf) is guaranteed representable in 4 bytes, because there are
                  # only 256 keys, and for each value, len(value) <= 0xfffe.
                  return metabuf
              _metaitemtypes = {
                  constants.METAKEYFLAG: (int, pycompat.long),
                  constants.METAKEYSIZE: (int, pycompat.long),
              }
              def buildpackmeta(metadict):
                  """like _buildpackmeta, but typechecks metadict and normalize it.
                  This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
                  and METAKEYFLAG will be dropped if its value is 0.
                  """
                  newmeta = {}
                  for k, v in pycompat.iteritems(metadict or {}):
                      expectedtype = _metaitemtypes.get(k, (bytes,))
                      if not isinstance(v, expectedtype):
                          raise error.ProgrammingError(b'packmeta: wrong type of key %s' % k)
                      # normalize int to binary buffer
                      if int in expectedtype:
                          # optimization: remove flag if it's 0 to save space
                          if k == constants.METAKEYFLAG and v == 0:
                              continue
                          v = int2bin(v)
                      newmeta[k] = v
                  return _buildpackmeta(newmeta)
              def parsepackmeta(metabuf):
                  """like _parsepackmeta, but convert fields to desired types automatically.
                  This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
                  integers.
                  """
                  metadict = _parsepackmeta(metabuf)
                  for k, v in pycompat.iteritems(metadict):
                      if k in _metaitemtypes and int in _metaitemtypes[k]:
                          metadict[k] = bin2int(v)
                  return metadict
              def int2bin(n):
                  """convert a non-negative integer to raw binary buffer"""
                  buf = bytearray()
                  while n > 0:
                      buf.insert(0, n & 0xFF)
                      n >>= 8
                  return bytes(buf)
              def bin2int(buf):
                  """the reverse of int2bin, convert a binary buffer to an integer"""
                  x = 0
                  for b in bytearray(buf):
                      x <<= 8
                      x |= b
                  return x
              def parsesizeflags(raw):
                  """given a remotefilelog blob, return (headersize, rawtextsize, flags)
                  see remotefilelogserver.createfileblob for the format.
                  raise RuntimeError if the content is illformed.
                  """
                  flags = revlog.REVIDX_DEFAULT_FLAGS
                  size = None
                  try:
                      index = raw.index(b'\0')
                      header = raw[:index]
                      if header.startswith(b'v'):
                          # v1 and above, header starts with 'v'
                          if header.startswith(b'v1\n'):
                              for s in header.split(b'\n'):
                                  if s.startswith(constants.METAKEYSIZE):
                                      size = int(s[len(constants.METAKEYSIZE) :])
                                  elif s.startswith(constants.METAKEYFLAG):
                                      flags = int(s[len(constants.METAKEYFLAG) :])
                          else:
                              raise RuntimeError(
                                  b'unsupported remotefilelog header: %s' % header
                              )
                      else:
                          # v0, str(int(size)) is the header
                          size = int(header)
                  except ValueError:
                      raise RuntimeError("unexpected remotefilelog header: illegal format")
                  if size is None:
                      raise RuntimeError("unexpected remotefilelog header: no size found")
                  return index + 1, size, flags
              def buildfileblobheader(size, flags, version=None):
                  """return the header of a remotefilelog blob.
                  see remotefilelogserver.createfileblob for the format.
                  approximately the reverse of parsesizeflags.
                  version could be 0 or 1, or None (auto decide).
                  """
                  # choose v0 if flags is empty, otherwise v1
                  if version is None:
                      version = int(bool(flags))
                  if version == 1:
                      header = b'v1\n%s%d\n%s%d' % (
                          constants.METAKEYSIZE,
                          size,
                          constants.METAKEYFLAG,
                          flags,
                      )
                  elif version == 0:
                      if flags:
                          raise error.ProgrammingError(b'fileblob v0 does not support flag')
                      header = b'%d' % size
                  else:
                      raise error.ProgrammingError(b'unknown fileblob version %d' % version)
                  return header
              def ancestormap(raw):
                  offset, size, flags = parsesizeflags(raw)
                  start = offset + size
                  mapping = {}
                  while start < len(raw):
                      divider = raw.index(b'\0', start + 80)
                      currentnode = raw[start : (start + 20)]
                      p1 = raw[(start + 20) : (start + 40)]
                      p2 = raw[(start + 40) : (start + 60)]
                      linknode = raw[(start + 60) : (start + 80)]
                      copyfrom = raw[(start + 80) : divider]
                      mapping[currentnode] = (p1, p2, linknode, copyfrom)
                      start = divider + 1
                  return mapping
              def readfile(path):
                  f = open(path, b'rb')
                  try:
                      result = f.read()
                      # we should never have empty files
                      if not result:
                          os.remove(path)
                          raise IOError(b"empty file: %s" % path)
                      return result
                  finally:
                      f.close()
              def unlinkfile(filepath):
                  if pycompat.iswindows:
                      # On Windows, os.unlink cannnot delete readonly files
                      os.chmod(filepath, stat.S_IWUSR)
                  os.unlink(filepath)
              def renamefile(source, destination):
                  if pycompat.iswindows:
                      # On Windows, os.rename cannot rename readonly files
                      # and cannot overwrite destination if it exists
                      os.chmod(source, stat.S_IWUSR)
                      if os.path.isfile(destination):
                          os.chmod(destination, stat.S_IWUSR)
                          os.unlink(destination)
                  os.rename(source, destination)
              def writefile(path, content, readonly=False):
                  dirname, filename = os.path.split(path)
                  if not os.path.exists(dirname):
                      try:
                          os.makedirs(dirname)
                      except OSError as ex:
                          if ex.errno != errno.EEXIST:
                              raise
                  fd, temp = tempfile.mkstemp(prefix=b'.%s-' % filename, dir=dirname)
                  os.close(fd)
                  try:
                      f = util.posixfile(temp, b'wb')
                      f.write(content)
                      f.close()
                      if readonly:
                          mode = 0o444
                      else:
                          # tempfiles are created with 0o600, so we need to manually set the
                          # mode.
                          oldumask = os.umask(0)
                          # there's no way to get the umask without modifying it, so set it
                          # back
                          os.umask(oldumask)
                          mode = ~oldumask
                      renamefile(temp, path)
                      os.chmod(path, mode)
                  except Exception:
                      try:
                          unlinkfile(temp)
                      except OSError:
                          pass
                      raise
              def sortnodes(nodes, parentfunc):
                  """Topologically sorts the nodes, using the parentfunc to find
                  the parents of nodes."""
                  nodes = set(nodes)
                  childmap = {}
                  parentmap = {}
                  roots = []
                  # Build a child and parent map
                  for n in nodes:
                      parents = [p for p in parentfunc(n) if p in nodes]
                      parentmap[n] = set(parents)
                      for p in parents:
                          childmap.setdefault(p, set()).add(n)
                      if not parents:
                          roots.append(n)
                  roots.sort()
                  # Process roots, adding children to the queue as they become roots
                  results = []
                  while roots:
                      n = roots.pop(0)
                      results.append(n)
                      if n in childmap:
                          children = childmap[n]
                          for c in children:
                              childparents = parentmap[c]
                              childparents.remove(n)
                              if len(childparents) == 0:
                                  # insert at the beginning, that way child nodes
                                  # are likely to be output immediately after their
                                  # parents.  This gives better compression results.
                                  roots.insert(0, c)
                  return results
              def readexactly(stream, n):
                  '''read n bytes from stream.read and abort if less was available'''
                  s = stream.read(n)
                  if len(s) < n:
                      raise error.Abort(
                          _(b"stream ended unexpectedly (got %d bytes, expected %d)")
                          % (len(s), n)
                      )
                  return s
              def readunpack(stream, fmt):
                  data = readexactly(stream, struct.calcsize(fmt))
                  return struct.unpack(fmt, data)
              def readpath(stream):
                  rawlen = readexactly(stream, constants.FILENAMESIZE)
                  pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
                  return readexactly(stream, pathlen)
              def readnodelist(stream):
                  rawlen = readexactly(stream, constants.NODECOUNTSIZE)
                  nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
                  for i in pycompat.xrange(nodecount):
                      yield readexactly(stream, constants.NODESIZE)
              def readpathlist(stream):
                  rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
                  pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
                  for i in pycompat.xrange(pathcount):
                      yield readpath(stream)
              def getgid(groupname):
                  try:
                      gid = grp.getgrnam(pycompat.fsdecode(groupname)).gr_gid
                      return gid
                  except KeyError:
                      return None
              def setstickygroupdir(path, gid, warn=None):
                  if gid is None:
                      return
                  try:
                      os.chown(path, -1, gid)
                      os.chmod(path, 0o2775)
                  except (IOError, OSError) as ex:
                      if warn:
                          warn(_(b'unable to chown/chmod on %s: %s\n') % (path, ex))
              def mkstickygroupdir(ui, path):
                  """Creates the given directory (if it doesn't exist) and give it a
                  particular group with setgid enabled."""
                  gid = None
                  groupname = ui.config(b"remotefilelog", b"cachegroup")
                  if groupname:
                      gid = getgid(groupname)
                      if gid is None:
                          ui.warn(_(b'unable to resolve group name: %s\n') % groupname)
                  # we use a single stat syscall to test the existence and mode / group bit
                  st = None
                  try:
                      st = os.stat(path)
                  except OSError:
                      pass
                  if st:
                      # exists
                      if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
                          # permission needs to be fixed
                          setstickygroupdir(path, gid, ui.warn)
                      return
                  oldumask = os.umask(0o002)
                  try:
                      missingdirs = [path]
                      path = os.path.dirname(path)
                      while path and not os.path.exists(path):
                          missingdirs.append(path)
                          path = os.path.dirname(path)
                      for path in reversed(missingdirs):
                          try:
                              os.mkdir(path)
                          except OSError as ex:
                              if ex.errno != errno.EEXIST:
                                  raise
                      for path in missingdirs:
                          setstickygroupdir(path, gid, ui.warn)
                  finally:
                      os.umask(oldumask)
              def getusername(ui):
                  try:
                      return stringutil.shortuser(ui.username())
                  except Exception:
                      return b'unknown'
              def getreponame(ui):
                  reponame = ui.config(b'paths', b'default')
                  if reponame:
                      return os.path.basename(reponame)
                  return b"unknown"

hgext/sqlitestore.py

0 +7 -5

              # sqlitestore.py - Storage backend that uses SQLite
              #
              # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              """store repository data in SQLite (EXPERIMENTAL)
              The sqlitestore extension enables the storage of repository data in SQLite.
              This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
              GUARANTEES. This means that repositories created with this extension may
              only be usable with the exact version of this extension/Mercurial that was
              used. The extension attempts to enforce this in order to prevent repository
              corruption.
              In addition, several features are not yet supported or have known bugs:
              * Only some data is stored in SQLite. Changeset, manifest, and other repository
                data is not yet stored in SQLite.
              * Transactions are not robust. If the process is aborted at the right time
                during transaction close/rollback, the repository could be in an inconsistent
                state. This problem will diminish once all repository data is tracked by
                SQLite.
              * Bundle repositories do not work (the ability to use e.g.
                `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
                existing repository).
              * Various other features don't work.
              This extension should work for basic clone/pull, update, and commit workflows.
              Some history rewriting operations may fail due to lack of support for bundle
              repositories.
              To use, activate the extension and set the ``storage.new-repo-backend`` config
              option to ``sqlite`` to enable new repositories to use SQLite for storage.
              """
              # To run the test suite with repos using SQLite by default, execute the
              # following:
              #
              # HGREPOFEATURES="sqlitestore" run-tests.py \
              #     --extra-config-opt extensions.sqlitestore= \
              #     --extra-config-opt storage.new-repo-backend=sqlite
              from __future__ import absolute_import
-             import hashlib
              import sqlite3
              import struct
              import threading
              import zlib
              from mercurial.i18n import _
              from mercurial.node import (
                  nullid,
                  nullrev,
                  short,
              )
              from mercurial.thirdparty import attr
              from mercurial import (
                  ancestor,
                  dagop,
                  encoding,
                  error,
                  extensions,
                  localrepo,
                  mdiff,
                  pycompat,
                  registrar,
                  util,
                  verify,
              )
              from mercurial.interfaces import (
                  repository,
                  util as interfaceutil,
              )
-             from mercurial.utils import storageutil
+             from mercurial.utils import (
+                 hashutil,
+                 storageutil,
+             )
              try:
                  from mercurial import zstd
                  zstd.__version__
              except ImportError:
                  zstd = None
              configtable = {}
              configitem = registrar.configitem(configtable)
              # experimental config: storage.sqlite.compression
              configitem(
                  b'storage',
                  b'sqlite.compression',
                  default=b'zstd' if zstd else b'zlib',
                  experimental=True,
              )
              # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
              # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
              # be specifying the version(s) of Mercurial they are tested with, or
              # leave the attribute unspecified.
              testedwith = b'ships-with-hg-core'
              REQUIREMENT = b'exp-sqlite-001'
              REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
              REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
              REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
              REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
              CURRENT_SCHEMA_VERSION = 1
              COMPRESSION_NONE = 1
              COMPRESSION_ZSTD = 2
              COMPRESSION_ZLIB = 3
              FLAG_CENSORED = 1
              FLAG_MISSING_P1 = 2
              FLAG_MISSING_P2 = 4
              CREATE_SCHEMA = [
                  # Deltas are stored as content-indexed blobs.
                  # compression column holds COMPRESSION_* constant for how the
                  # delta is encoded.
                  'CREATE TABLE delta ('
                  '    id INTEGER PRIMARY KEY, '
                  '    compression INTEGER NOT NULL, '
                  '    hash BLOB UNIQUE ON CONFLICT ABORT, '
                  '    delta BLOB NOT NULL '
                  ')',
                  # Tracked paths are denormalized to integers to avoid redundant
                  # storage of the path name.
                  'CREATE TABLE filepath ('
                  '    id INTEGER PRIMARY KEY, '
                  '    path BLOB NOT NULL '
                  ')',
                  'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
                  # We have a single table for all file revision data.
                  # Each file revision is uniquely described by a (path, rev) and
                  # (path, node).
                  #
                  # Revision data is stored as a pointer to the delta producing this
                  # revision and the file revision whose delta should be applied before
                  # that one. One can reconstruct the delta chain by recursively following
                  # the delta base revision pointers until one encounters NULL.
                  #
                  # flags column holds bitwise integer flags controlling storage options.
                  # These flags are defined by the FLAG_* constants.
                  'CREATE TABLE fileindex ('
                  '    id INTEGER PRIMARY KEY, '
                  '    pathid INTEGER REFERENCES filepath(id), '
                  '    revnum INTEGER NOT NULL, '
                  '    p1rev INTEGER NOT NULL, '
                  '    p2rev INTEGER NOT NULL, '
                  '    linkrev INTEGER NOT NULL, '
                  '    flags INTEGER NOT NULL, '
                  '    deltaid INTEGER REFERENCES delta(id), '
                  '    deltabaseid INTEGER REFERENCES fileindex(id), '
                  '    node BLOB NOT NULL '
                  ')',
                  'CREATE UNIQUE INDEX fileindex_pathrevnum '
                  '    ON fileindex (pathid, revnum)',
                  'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
                  # Provide a view over all file data for convenience.
                  'CREATE VIEW filedata AS '
                  'SELECT '
                  '    fileindex.id AS id, '
                  '    filepath.id AS pathid, '
                  '    filepath.path AS path, '
                  '    fileindex.revnum AS revnum, '
                  '    fileindex.node AS node, '
                  '    fileindex.p1rev AS p1rev, '
                  '    fileindex.p2rev AS p2rev, '
                  '    fileindex.linkrev AS linkrev, '
                  '    fileindex.flags AS flags, '
                  '    fileindex.deltaid AS deltaid, '
                  '    fileindex.deltabaseid AS deltabaseid '
                  'FROM filepath, fileindex '
                  'WHERE fileindex.pathid=filepath.id',
                  'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
              ]
              def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
                  """Resolve a delta chain for a file node."""
                  # TODO the "not in ({stops})" here is possibly slowing down the query
                  # because it needs to perform the lookup on every recursive invocation.
                  # This could possibly be faster if we created a temporary query with
                  # baseid "poisoned" to null and limited the recursive filter to
                  # "is not null".
                  res = db.execute(
                      'WITH RECURSIVE '
                      '    deltachain(deltaid, baseid) AS ('
                      '        SELECT deltaid, deltabaseid FROM fileindex '
                      '            WHERE pathid=? AND node=? '
                      '        UNION ALL '
                      '        SELECT fileindex.deltaid, deltabaseid '
                      '            FROM fileindex, deltachain '
                      '            WHERE '
                      '                fileindex.id=deltachain.baseid '
                      '                AND deltachain.baseid IS NOT NULL '
                      '                AND fileindex.id NOT IN ({stops}) '
                      '    ) '
                      'SELECT deltachain.baseid, compression, delta '
                      'FROM deltachain, delta '
                      'WHERE delta.id=deltachain.deltaid'.format(
                          stops=','.join(['?'] * len(stoprids))
                      ),
                      tuple([pathid, node] + list(stoprids.keys())),
                  )
                  deltas = []
                  lastdeltabaseid = None
                  for deltabaseid, compression, delta in res:
                      lastdeltabaseid = deltabaseid
                      if compression == COMPRESSION_ZSTD:
                          delta = zstddctx.decompress(delta)
                      elif compression == COMPRESSION_NONE:
                          delta = delta
                      elif compression == COMPRESSION_ZLIB:
                          delta = zlib.decompress(delta)
                      else:
                          raise SQLiteStoreError(
                              b'unhandled compression type: %d' % compression
                          )
                      deltas.append(delta)
                  if lastdeltabaseid in stoprids:
                      basetext = revisioncache[stoprids[lastdeltabaseid]]
                  else:
                      basetext = deltas.pop()
                  deltas.reverse()
                  fulltext = mdiff.patches(basetext, deltas)
                  # SQLite returns buffer instances for blob columns on Python 2. This
                  # type can propagate through the delta application layer. Because
                  # downstream callers assume revisions are bytes, cast as needed.
                  if not isinstance(fulltext, bytes):
                      fulltext = bytes(delta)
                  return fulltext
              def insertdelta(db, compression, hash, delta):
                  try:
                      return db.execute(
                          'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
                          (compression, hash, delta),
                      ).lastrowid
                  except sqlite3.IntegrityError:
                      return db.execute(
                          'SELECT id FROM delta WHERE hash=?', (hash,)
                      ).fetchone()[0]
              class SQLiteStoreError(error.StorageError):
                  pass
              @attr.s
              class revisionentry(object):
                  rid = attr.ib()
                  rev = attr.ib()
                  node = attr.ib()
                  p1rev = attr.ib()
                  p2rev = attr.ib()
                  p1node = attr.ib()
                  p2node = attr.ib()
                  linkrev = attr.ib()
                  flags = attr.ib()
              @interfaceutil.implementer(repository.irevisiondelta)
              @attr.s(slots=True)
              class sqliterevisiondelta(object):
                  node = attr.ib()
                  p1node = attr.ib()
                  p2node = attr.ib()
                  basenode = attr.ib()
                  flags = attr.ib()
                  baserevisionsize = attr.ib()
                  revision = attr.ib()
                  delta = attr.ib()
                  linknode = attr.ib(default=None)
              @interfaceutil.implementer(repository.iverifyproblem)
              @attr.s(frozen=True)
              class sqliteproblem(object):
                  warning = attr.ib(default=None)
                  error = attr.ib(default=None)
                  node = attr.ib(default=None)
              @interfaceutil.implementer(repository.ifilestorage)
              class sqlitefilestore(object):
                  """Implements storage for an individual tracked path."""
                  def __init__(self, db, path, compression):
                      self._db = db
                      self._path = path
                      self._pathid = None
                      # revnum -> node
                      self._revtonode = {}
                      # node -> revnum
                      self._nodetorev = {}
                      # node -> data structure
                      self._revisions = {}
                      self._revisioncache = util.lrucachedict(10)
                      self._compengine = compression
                      if compression == b'zstd':
                          self._cctx = zstd.ZstdCompressor(level=3)
                          self._dctx = zstd.ZstdDecompressor()
                      else:
                          self._cctx = None
                          self._dctx = None
                      self._refreshindex()
                  def _refreshindex(self):
                      self._revtonode = {}
                      self._nodetorev = {}
                      self._revisions = {}
                      res = list(
                          self._db.execute(
                              'SELECT id FROM filepath WHERE path=?', (self._path,)
                          )
                      )
                      if not res:
                          self._pathid = None
                          return
                      self._pathid = res[0][0]
                      res = self._db.execute(
                          'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
                          'FROM fileindex '
                          'WHERE pathid=? '
                          'ORDER BY revnum ASC',
                          (self._pathid,),
                      )
                      for i, row in enumerate(res):
                          rid, rev, node, p1rev, p2rev, linkrev, flags = row
                          if i != rev:
                              raise SQLiteStoreError(
                                  _(b'sqlite database has inconsistent revision numbers')
                              )
                          if p1rev == nullrev:
                              p1node = nullid
                          else:
                              p1node = self._revtonode[p1rev]
                          if p2rev == nullrev:
                              p2node = nullid
                          else:
                              p2node = self._revtonode[p2rev]
                          entry = revisionentry(
                              rid=rid,
                              rev=rev,
                              node=node,
                              p1rev=p1rev,
                              p2rev=p2rev,
                              p1node=p1node,
                              p2node=p2node,
                              linkrev=linkrev,
                              flags=flags,
                          )
                          self._revtonode[rev] = node
                          self._nodetorev[node] = rev
                          self._revisions[node] = entry
                  # Start of ifileindex interface.
                  def __len__(self):
                      return len(self._revisions)
                  def __iter__(self):
                      return iter(pycompat.xrange(len(self._revisions)))
                  def hasnode(self, node):
                      if node == nullid:
                          return False
                      return node in self._nodetorev
                  def revs(self, start=0, stop=None):
                      return storageutil.iterrevs(
                          len(self._revisions), start=start, stop=stop
                      )
                  def parents(self, node):
                      if node == nullid:
                          return nullid, nullid
                      if node not in self._revisions:
                          raise error.LookupError(node, self._path, _(b'no node'))
                      entry = self._revisions[node]
                      return entry.p1node, entry.p2node
                  def parentrevs(self, rev):
                      if rev == nullrev:
                          return nullrev, nullrev
                      if rev not in self._revtonode:
                          raise IndexError(rev)
                      entry = self._revisions[self._revtonode[rev]]
                      return entry.p1rev, entry.p2rev
                  def rev(self, node):
                      if node == nullid:
                          return nullrev
                      if node not in self._nodetorev:
                          raise error.LookupError(node, self._path, _(b'no node'))
                      return self._nodetorev[node]
                  def node(self, rev):
                      if rev == nullrev:
                          return nullid
                      if rev not in self._revtonode:
                          raise IndexError(rev)
                      return self._revtonode[rev]
                  def lookup(self, node):
                      return storageutil.fileidlookup(self, node, self._path)
                  def linkrev(self, rev):
                      if rev == nullrev:
                          return nullrev
                      if rev not in self._revtonode:
                          raise IndexError(rev)
                      entry = self._revisions[self._revtonode[rev]]
                      return entry.linkrev
                  def iscensored(self, rev):
                      if rev == nullrev:
                          return False
                      if rev not in self._revtonode:
                          raise IndexError(rev)
                      return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
                  def commonancestorsheads(self, node1, node2):
                      rev1 = self.rev(node1)
                      rev2 = self.rev(node2)
                      ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
                      return pycompat.maplist(self.node, ancestors)
                  def descendants(self, revs):
                      # TODO we could implement this using a recursive SQL query, which
                      # might be faster.
                      return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                  def heads(self, start=None, stop=None):
                      if start is None and stop is None:
                          if not len(self):
                              return [nullid]
                      startrev = self.rev(start) if start is not None else nullrev
                      stoprevs = {self.rev(n) for n in stop or []}
                      revs = dagop.headrevssubset(
                          self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
                      )
                      return [self.node(rev) for rev in revs]
                  def children(self, node):
                      rev = self.rev(node)
                      res = self._db.execute(
                          'SELECT'
                          '  node '
                          '  FROM filedata '
                          '  WHERE path=? AND (p1rev=? OR p2rev=?) '
                          '  ORDER BY revnum ASC',
                          (self._path, rev, rev),
                      )
                      return [row[0] for row in res]
                  # End of ifileindex interface.
                  # Start of ifiledata interface.
                  def size(self, rev):
                      if rev == nullrev:
                          return 0
                      if rev not in self._revtonode:
                          raise IndexError(rev)
                      node = self._revtonode[rev]
                      if self.renamed(node):
                          return len(self.read(node))
                      return len(self.revision(node))
                  def revision(self, node, raw=False, _verifyhash=True):
                      if node in (nullid, nullrev):
                          return b''
                      if isinstance(node, int):
                          node = self.node(node)
                      if node not in self._nodetorev:
                          raise error.LookupError(node, self._path, _(b'no node'))
                      if node in self._revisioncache:
                          return self._revisioncache[node]
                      # Because we have a fulltext revision cache, we are able to
                      # short-circuit delta chain traversal and decompression as soon as
                      # we encounter a revision in the cache.
                      stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
                      if not stoprids:
                          stoprids[-1] = None
                      fulltext = resolvedeltachain(
                          self._db,
                          self._pathid,
                          node,
                          self._revisioncache,
                          stoprids,
                          zstddctx=self._dctx,
                      )
                      # Don't verify hashes if parent nodes were rewritten, as the hash
                      # wouldn't verify.
                      if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
                          _verifyhash = False
                      if _verifyhash:
                          self._checkhash(fulltext, node)
                          self._revisioncache[node] = fulltext
                      return fulltext
                  def rawdata(self, *args, **kwargs):
                      return self.revision(*args, **kwargs)
                  def read(self, node):
                      return storageutil.filtermetadata(self.revision(node))
                  def renamed(self, node):
                      return storageutil.filerevisioncopied(self, node)
                  def cmp(self, node, fulltext):
                      return not storageutil.filedataequivalent(self, node, fulltext)
                  def emitrevisions(
                      self,
                      nodes,
                      nodesorder=None,
                      revisiondata=False,
                      assumehaveparentrevisions=False,
                      deltamode=repository.CG_DELTAMODE_STD,
                  ):
                      if nodesorder not in (b'nodes', b'storage', b'linear', None):
                          raise error.ProgrammingError(
                              b'unhandled value for nodesorder: %s' % nodesorder
                          )
                      nodes = [n for n in nodes if n != nullid]
                      if not nodes:
                          return
                      # TODO perform in a single query.
                      res = self._db.execute(
                          'SELECT revnum, deltaid FROM fileindex '
                          'WHERE pathid=? '
                          '    AND node in (%s)' % (','.join(['?'] * len(nodes))),
                          tuple([self._pathid] + nodes),
                      )
                      deltabases = {}
                      for rev, deltaid in res:
                          res = self._db.execute(
                              'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
                              (self._pathid, deltaid),
                          )
                          deltabases[rev] = res.fetchone()[0]
                      # TODO define revdifffn so we can use delta from storage.
                      for delta in storageutil.emitrevisions(
                          self,
                          nodes,
                          nodesorder,
                          sqliterevisiondelta,
                          deltaparentfn=deltabases.__getitem__,
                          revisiondata=revisiondata,
                          assumehaveparentrevisions=assumehaveparentrevisions,
                          deltamode=deltamode,
                      ):
                          yield delta
                  # End of ifiledata interface.
                  # Start of ifilemutation interface.
                  def add(self, filedata, meta, transaction, linkrev, p1, p2):
                      if meta or filedata.startswith(b'\x01\n'):
                          filedata = storageutil.packmeta(meta, filedata)
                      return self.addrevision(filedata, transaction, linkrev, p1, p2)
                  def addrevision(
                      self,
                      revisiondata,
                      transaction,
                      linkrev,
                      p1,
                      p2,
                      node=None,
                      flags=0,
                      cachedelta=None,
                  ):
                      if flags:
                          raise SQLiteStoreError(_(b'flags not supported on revisions'))
                      validatehash = node is not None
                      node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
                      if validatehash:
                          self._checkhash(revisiondata, node, p1, p2)
                      if node in self._nodetorev:
                          return node
                      node = self._addrawrevision(
                          node, revisiondata, transaction, linkrev, p1, p2
                      )
                      self._revisioncache[node] = revisiondata
                      return node
                  def addgroup(
                      self,
                      deltas,
                      linkmapper,
                      transaction,
                      addrevisioncb=None,
                      maybemissingparents=False,
                  ):
                      nodes = []
                      for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
                          storeflags = 0
                          if wireflags & repository.REVISION_FLAG_CENSORED:
                              storeflags |= FLAG_CENSORED
                          if wireflags & ~repository.REVISION_FLAG_CENSORED:
                              raise SQLiteStoreError(b'unhandled revision flag')
                          if maybemissingparents:
                              if p1 != nullid and not self.hasnode(p1):
                                  p1 = nullid
                                  storeflags |= FLAG_MISSING_P1
                              if p2 != nullid and not self.hasnode(p2):
                                  p2 = nullid
                                  storeflags |= FLAG_MISSING_P2
                          baserev = self.rev(deltabase)
                          # If base is censored, delta must be full replacement in a single
                          # patch operation.
                          if baserev != nullrev and self.iscensored(baserev):
                              hlen = struct.calcsize(b'>lll')
                              oldlen = len(self.rawdata(deltabase, _verifyhash=False))
                              newlen = len(delta) - hlen
                              if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                  raise error.CensoredBaseError(self._path, deltabase)
                          if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
                              delta, baserev, lambda x: len(self.rawdata(x))
                          ):
                              storeflags |= FLAG_CENSORED
                          linkrev = linkmapper(linknode)
                          nodes.append(node)
                          if node in self._revisions:
                              # Possibly reset parents to make them proper.
                              entry = self._revisions[node]
                              if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
                                  entry.p1node = p1
                                  entry.p1rev = self._nodetorev[p1]
                                  entry.flags &= ~FLAG_MISSING_P1
                                  self._db.execute(
                                      'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
                                      (self._nodetorev[p1], entry.flags, entry.rid),
                                  )
                              if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
                                  entry.p2node = p2
                                  entry.p2rev = self._nodetorev[p2]
                                  entry.flags &= ~FLAG_MISSING_P2
                                  self._db.execute(
                                      'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
                                      (self._nodetorev[p1], entry.flags, entry.rid),
                                  )
                              continue
                          if deltabase == nullid:
                              text = mdiff.patch(b'', delta)
                              storedelta = None
                          else:
                              text = None
                              storedelta = (deltabase, delta)
                          self._addrawrevision(
                              node,
                              text,
                              transaction,
                              linkrev,
                              p1,
                              p2,
                              storedelta=storedelta,
                              flags=storeflags,
                          )
                          if addrevisioncb:
                              addrevisioncb(self, node)
                      return nodes
                  def censorrevision(self, tr, censornode, tombstone=b''):
                      tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                      # This restriction is cargo culted from revlogs and makes no sense for
                      # SQLite, since columns can be resized at will.
                      if len(tombstone) > len(self.rawdata(censornode)):
                          raise error.Abort(
                              _(b'censor tombstone must be no longer than censored data')
                          )
                      # We need to replace the censored revision's data with the tombstone.
                      # But replacing that data will have implications for delta chains that
                      # reference it.
                      #
                      # While "better," more complex strategies are possible, we do something
                      # simple: we find delta chain children of the censored revision and we
                      # replace those incremental deltas with fulltexts of their corresponding
                      # revision. Then we delete the now-unreferenced delta and original
                      # revision and insert a replacement.
                      # Find the delta to be censored.
                      censoreddeltaid = self._db.execute(
                          'SELECT deltaid FROM fileindex WHERE id=?',
                          (self._revisions[censornode].rid,),
                      ).fetchone()[0]
                      # Find all its delta chain children.
                      # TODO once we support storing deltas for !files, we'll need to look
                      # for those delta chains too.
                      rows = list(
                          self._db.execute(
                              'SELECT id, pathid, node FROM fileindex '
                              'WHERE deltabaseid=? OR deltaid=?',
                              (censoreddeltaid, censoreddeltaid),
                          )
                      )
                      for row in rows:
                          rid, pathid, node = row
                          fulltext = resolvedeltachain(
                              self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
                          )
-                         deltahash = hashlib.sha1(fulltext).digest()
+                         deltahash = hashutil.sha1(fulltext).digest()
                          if self._compengine == b'zstd':
                              deltablob = self._cctx.compress(fulltext)
                              compression = COMPRESSION_ZSTD
                          elif self._compengine == b'zlib':
                              deltablob = zlib.compress(fulltext)
                              compression = COMPRESSION_ZLIB
                          elif self._compengine == b'none':
                              deltablob = fulltext
                              compression = COMPRESSION_NONE
                          else:
                              raise error.ProgrammingError(
                                  b'unhandled compression engine: %s' % self._compengine
                              )
                          if len(deltablob) >= len(fulltext):
                              deltablob = fulltext
                              compression = COMPRESSION_NONE
                          deltaid = insertdelta(self._db, compression, deltahash, deltablob)
                          self._db.execute(
                              'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
                              'WHERE id=?',
                              (deltaid, rid),
                          )
                      # Now create the tombstone delta and replace the delta on the censored
                      # node.
-                     deltahash = hashlib.sha1(tombstone).digest()
+                     deltahash = hashutil.sha1(tombstone).digest()
                      tombstonedeltaid = insertdelta(
                          self._db, COMPRESSION_NONE, deltahash, tombstone
                      )
                      flags = self._revisions[censornode].flags
                      flags |= FLAG_CENSORED
                      self._db.execute(
                          'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
                          'WHERE pathid=? AND node=?',
                          (flags, tombstonedeltaid, self._pathid, censornode),
                      )
                      self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
                      self._refreshindex()
                      self._revisioncache.clear()
                  def getstrippoint(self, minlink):
                      return storageutil.resolvestripinfo(
                          minlink,
                          len(self) - 1,
                          [self.rev(n) for n in self.heads()],
                          self.linkrev,
                          self.parentrevs,
                      )
                  def strip(self, minlink, transaction):
                      if not len(self):
                          return
                      rev, _ignored = self.getstrippoint(minlink)
                      if rev == len(self):
                          return
                      for rev in self.revs(rev):
                          self._db.execute(
                              'DELETE FROM fileindex WHERE pathid=? AND node=?',
                              (self._pathid, self.node(rev)),
                          )
                      # TODO how should we garbage collect data in delta table?
                      self._refreshindex()
                  # End of ifilemutation interface.
                  # Start of ifilestorage interface.
                  def files(self):
                      return []
                  def storageinfo(
                      self,
                      exclusivefiles=False,
                      sharedfiles=False,
                      revisionscount=False,
                      trackedsize=False,
                      storedsize=False,
                  ):
                      d = {}
                      if exclusivefiles:
                          d[b'exclusivefiles'] = []
                      if sharedfiles:
                          # TODO list sqlite file(s) here.
                          d[b'sharedfiles'] = []
                      if revisionscount:
                          d[b'revisionscount'] = len(self)
                      if trackedsize:
                          d[b'trackedsize'] = sum(
                              len(self.revision(node)) for node in self._nodetorev
                          )
                      if storedsize:
                          # TODO implement this?
                          d[b'storedsize'] = None
                      return d
                  def verifyintegrity(self, state):
                      state[b'skipread'] = set()
                      for rev in self:
                          node = self.node(rev)
                          try:
                              self.revision(node)
                          except Exception as e:
                              yield sqliteproblem(
                                  error=_(b'unpacking %s: %s') % (short(node), e), node=node
                              )
                              state[b'skipread'].add(node)
                  # End of ifilestorage interface.
                  def _checkhash(self, fulltext, node, p1=None, p2=None):
                      if p1 is None and p2 is None:
                          p1, p2 = self.parents(node)
                      if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
                          return
                      try:
                          del self._revisioncache[node]
                      except KeyError:
                          pass
                      if storageutil.iscensoredtext(fulltext):
                          raise error.CensoredNodeError(self._path, node, fulltext)
                      raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
                  def _addrawrevision(
                      self,
                      node,
                      revisiondata,
                      transaction,
                      linkrev,
                      p1,
                      p2,
                      storedelta=None,
                      flags=0,
                  ):
                      if self._pathid is None:
                          res = self._db.execute(
                              'INSERT INTO filepath (path) VALUES (?)', (self._path,)
                          )
                          self._pathid = res.lastrowid
                      # For simplicity, always store a delta against p1.
                      # TODO we need a lot more logic here to make behavior reasonable.
                      if storedelta:
                          deltabase, delta = storedelta
                          if isinstance(deltabase, int):
                              deltabase = self.node(deltabase)
                      else:
                          assert revisiondata is not None
                          deltabase = p1
                          if deltabase == nullid:
                              delta = revisiondata
                          else:
                              delta = mdiff.textdiff(
                                  self.revision(self.rev(deltabase)), revisiondata
                              )
                      # File index stores a pointer to its delta and the parent delta.
                      # The parent delta is stored via a pointer to the fileindex PK.
                      if deltabase == nullid:
                          baseid = None
                      else:
                          baseid = self._revisions[deltabase].rid
                      # Deltas are stored with a hash of their content. This allows
                      # us to de-duplicate. The table is configured to ignore conflicts
                      # and it is faster to just insert and silently noop than to look
                      # first.
-                     deltahash = hashlib.sha1(delta).digest()
+                     deltahash = hashutil.sha1(delta).digest()
                      if self._compengine == b'zstd':
                          deltablob = self._cctx.compress(delta)
                          compression = COMPRESSION_ZSTD
                      elif self._compengine == b'zlib':
                          deltablob = zlib.compress(delta)
                          compression = COMPRESSION_ZLIB
                      elif self._compengine == b'none':
                          deltablob = delta
                          compression = COMPRESSION_NONE
                      else:
                          raise error.ProgrammingError(
                              b'unhandled compression engine: %s' % self._compengine
                          )
                      # Don't store compressed data if it isn't practical.
                      if len(deltablob) >= len(delta):
                          deltablob = delta
                          compression = COMPRESSION_NONE
                      deltaid = insertdelta(self._db, compression, deltahash, deltablob)
                      rev = len(self)
                      if p1 == nullid:
                          p1rev = nullrev
                      else:
                          p1rev = self._nodetorev[p1]
                      if p2 == nullid:
                          p2rev = nullrev
                      else:
                          p2rev = self._nodetorev[p2]
                      rid = self._db.execute(
                          'INSERT INTO fileindex ('
                          '    pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
                          '    deltaid, deltabaseid) '
                          '    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
                          (
                              self._pathid,
                              rev,
                              node,
                              p1rev,
                              p2rev,
                              linkrev,
                              flags,
                              deltaid,
                              baseid,
                          ),
                      ).lastrowid
                      entry = revisionentry(
                          rid=rid,
                          rev=rev,
                          node=node,
                          p1rev=p1rev,
                          p2rev=p2rev,
                          p1node=p1,
                          p2node=p2,
                          linkrev=linkrev,
                          flags=flags,
                      )
                      self._nodetorev[node] = rev
                      self._revtonode[rev] = node
                      self._revisions[node] = entry
                      return node
              class sqliterepository(localrepo.localrepository):
                  def cancopy(self):
                      return False
                  def transaction(self, *args, **kwargs):
                      current = self.currenttransaction()
                      tr = super(sqliterepository, self).transaction(*args, **kwargs)
                      if current:
                          return tr
                      self._dbconn.execute('BEGIN TRANSACTION')
                      def committransaction(_):
                          self._dbconn.commit()
                      tr.addfinalize(b'sqlitestore', committransaction)
                      return tr
                  @property
                  def _dbconn(self):
                      # SQLite connections can only be used on the thread that created
                      # them. In most cases, this "just works." However, hgweb uses
                      # multiple threads.
                      tid = threading.current_thread().ident
                      if self._db:
                          if self._db[0] == tid:
                              return self._db[1]
                      db = makedb(self.svfs.join(b'db.sqlite'))
                      self._db = (tid, db)
                      return db
              def makedb(path):
                  """Construct a database handle for a database at path."""
                  db = sqlite3.connect(encoding.strfromlocal(path))
                  db.text_factory = bytes
                  res = db.execute('PRAGMA user_version').fetchone()[0]
                  # New database.
                  if res == 0:
                      for statement in CREATE_SCHEMA:
                          db.execute(statement)
                      db.commit()
                  elif res == CURRENT_SCHEMA_VERSION:
                      pass
                  else:
                      raise error.Abort(_(b'sqlite database has unrecognized version'))
                  db.execute('PRAGMA journal_mode=WAL')
                  return db
              def featuresetup(ui, supported):
                  supported.add(REQUIREMENT)
                  if zstd:
                      supported.add(REQUIREMENT_ZSTD)
                  supported.add(REQUIREMENT_ZLIB)
                  supported.add(REQUIREMENT_NONE)
                  supported.add(REQUIREMENT_SHALLOW_FILES)
                  supported.add(repository.NARROW_REQUIREMENT)
              def newreporequirements(orig, ui, createopts):
                  if createopts[b'backend'] != b'sqlite':
                      return orig(ui, createopts)
                  # This restriction can be lifted once we have more confidence.
                  if b'sharedrepo' in createopts:
                      raise error.Abort(
                          _(b'shared repositories not supported with SQLite store')
                      )
                  # This filtering is out of an abundance of caution: we want to ensure
                  # we honor creation options and we do that by annotating exactly the
                  # creation options we recognize.
                  known = {
                      b'narrowfiles',
                      b'backend',
                      b'shallowfilestore',
                  }
                  unsupported = set(createopts) - known
                  if unsupported:
                      raise error.Abort(
                          _(b'SQLite store does not support repo creation option: %s')
                          % b', '.join(sorted(unsupported))
                      )
                  # Since we're a hybrid store that still relies on revlogs, we fall back
                  # to using the revlogv1 backend's storage requirements then adding our
                  # own requirement.
                  createopts[b'backend'] = b'revlogv1'
                  requirements = orig(ui, createopts)
                  requirements.add(REQUIREMENT)
                  compression = ui.config(b'storage', b'sqlite.compression')
                  if compression == b'zstd' and not zstd:
                      raise error.Abort(
                          _(
                              b'storage.sqlite.compression set to "zstd" but '
                              b'zstandard compression not available to this '
                              b'Mercurial install'
                          )
                      )
                  if compression == b'zstd':
                      requirements.add(REQUIREMENT_ZSTD)
                  elif compression == b'zlib':
                      requirements.add(REQUIREMENT_ZLIB)
                  elif compression == b'none':
                      requirements.add(REQUIREMENT_NONE)
                  else:
                      raise error.Abort(
                          _(
                              b'unknown compression engine defined in '
                              b'storage.sqlite.compression: %s'
                          )
                          % compression
                      )
                  if createopts.get(b'shallowfilestore'):
                      requirements.add(REQUIREMENT_SHALLOW_FILES)
                  return requirements
              @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
              class sqlitefilestorage(object):
                  """Repository file storage backed by SQLite."""
                  def file(self, path):
                      if path[0] == b'/':
                          path = path[1:]
                      if REQUIREMENT_ZSTD in self.requirements:
                          compression = b'zstd'
                      elif REQUIREMENT_ZLIB in self.requirements:
                          compression = b'zlib'
                      elif REQUIREMENT_NONE in self.requirements:
                          compression = b'none'
                      else:
                          raise error.Abort(
                              _(
                                  b'unable to determine what compression engine '
                                  b'to use for SQLite storage'
                              )
                          )
                      return sqlitefilestore(self._dbconn, path, compression)
              def makefilestorage(orig, requirements, features, **kwargs):
                  """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
                  if REQUIREMENT in requirements:
                      if REQUIREMENT_SHALLOW_FILES in requirements:
                          features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
                      return sqlitefilestorage
                  else:
                      return orig(requirements=requirements, features=features, **kwargs)
              def makemain(orig, ui, requirements, **kwargs):
                  if REQUIREMENT in requirements:
                      if REQUIREMENT_ZSTD in requirements and not zstd:
                          raise error.Abort(
                              _(
                                  b'repository uses zstandard compression, which '
                                  b'is not available to this Mercurial install'
                              )
                          )
                      return sqliterepository
                  return orig(requirements=requirements, **kwargs)
              def verifierinit(orig, self, *args, **kwargs):
                  orig(self, *args, **kwargs)
                  # We don't care that files in the store don't align with what is
                  # advertised. So suppress these warnings.
                  self.warnorphanstorefiles = False
              def extsetup(ui):
                  localrepo.featuresetupfuncs.add(featuresetup)
                  extensions.wrapfunction(
                      localrepo, b'newreporequirements', newreporequirements
                  )
                  extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
                  extensions.wrapfunction(localrepo, b'makemain', makemain)
                  extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
              def reposetup(ui, repo):
                  if isinstance(repo, sqliterepository):
                      repo._db = None
                  # TODO check for bundlerepository?

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages