upstream/mercurial-mirror Commit - r31346:2a18e9e6

py3: use bytearray() instead of array('c', ...) constructions...

Augie Fackler -

r31346:2a18e9e6 default

parent child

mercurial/branchmap.py

0 +4 -6

              # branchmap.py - logic to computes, maintain and stores branchmap for local repo
              #
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import array
              import struct
              from .node import (
                  bin,
                  hex,
                  nullid,
                  nullrev,
              )
              from . import (
                  encoding,
                  error,
                  scmutil,
                  util,
              )
              array = array.array
              calcsize = struct.calcsize
              pack = struct.pack
              unpack = struct.unpack
              def _filename(repo):
                  """name of a branchcache file for a given repo or repoview"""
                  filename = "cache/branch2"
                  if repo.filtername:
                      filename = '%s-%s' % (filename, repo.filtername)
                  return filename
              def read(repo):
                  try:
                      f = repo.vfs(_filename(repo))
                      lines = f.read().split('\n')
                      f.close()
                  except (IOError, OSError):
                      return None
                  try:
                      cachekey = lines.pop(0).split(" ", 2)
                      last, lrev = cachekey[:2]
                      last, lrev = bin(last), int(lrev)
                      filteredhash = None
                      if len(cachekey) > 2:
                          filteredhash = bin(cachekey[2])
                      partial = branchcache(tipnode=last, tiprev=lrev,
                                            filteredhash=filteredhash)
                      if not partial.validfor(repo):
                          # invalidate the cache
                          raise ValueError('tip differs')
                      cl = repo.changelog
                      for l in lines:
                          if not l:
                              continue
                          node, state, label = l.split(" ", 2)
                          if state not in 'oc':
                              raise ValueError('invalid branch state')
                          label = encoding.tolocal(label.strip())
                          node = bin(node)
                          if not cl.hasnode(node):
                              raise ValueError('node %s does not exist' % hex(node))
                          partial.setdefault(label, []).append(node)
                          if state == 'c':
                              partial._closednodes.add(node)
                  except KeyboardInterrupt:
                      raise
                  except Exception as inst:
                      if repo.ui.debugflag:
                          msg = 'invalid branchheads cache'
                          if repo.filtername is not None:
                              msg += ' (%s)' % repo.filtername
                          msg += ': %s\n'
                          repo.ui.debug(msg % inst)
                      partial = None
                  return partial
              ### Nearest subset relation
              # Nearest subset of filter X is a filter Y so that:
              # * Y is included in X,
              # * X - Y is as small as possible.
              # This create and ordering used for branchmap purpose.
              # the ordering may be partial
              subsettable = {None: 'visible',
                             'visible': 'served',
                             'served': 'immutable',
                             'immutable': 'base'}
              def updatecache(repo):
                  cl = repo.changelog
                  filtername = repo.filtername
                  partial = repo._branchcaches.get(filtername)
                  revs = []
                  if partial is None or not partial.validfor(repo):
                      partial = read(repo)
                      if partial is None:
                          subsetname = subsettable.get(filtername)
                          if subsetname is None:
                              partial = branchcache()
                          else:
                              subset = repo.filtered(subsetname)
                              partial = subset.branchmap().copy()
                              extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
                              revs.extend(r for  r in extrarevs if r <= partial.tiprev)
                  revs.extend(cl.revs(start=partial.tiprev + 1))
                  if revs:
                      partial.update(repo, revs)
                      partial.write(repo)
                  assert partial.validfor(repo), filtername
                  repo._branchcaches[repo.filtername] = partial
              def replacecache(repo, bm):
                  """Replace the branchmap cache for a repo with a branch mapping.
                  This is likely only called during clone with a branch map from a remote.
                  """
                  rbheads = []
                  closed = []
                  for bheads in bm.itervalues():
                      rbheads.extend(bheads)
                      for h in bheads:
                          r = repo.changelog.rev(h)
                          b, c = repo.changelog.branchinfo(r)
                          if c:
                              closed.append(h)
                  if rbheads:
                      rtiprev = max((int(repo.changelog.rev(node))
                              for node in rbheads))
                      cache = branchcache(bm,
                                          repo[rtiprev].node(),
                                          rtiprev,
                                          closednodes=closed)
                      # Try to stick it as low as possible
                      # filter above served are unlikely to be fetch from a clone
                      for candidate in ('base', 'immutable', 'served'):
                          rview = repo.filtered(candidate)
                          if cache.validfor(rview):
                              repo._branchcaches[candidate] = cache
                              cache.write(rview)
                              break
              class branchcache(dict):
                  """A dict like object that hold branches heads cache.
                  This cache is used to avoid costly computations to determine all the
                  branch heads of a repo.
                  The cache is serialized on disk in the following format:
                  <tip hex node> <tip rev number> [optional filtered repo hex hash]
                  <branch head hex node> <open/closed state> <branch name>
                  <branch head hex node> <open/closed state> <branch name>
                  ...
                  The first line is used to check if the cache is still valid. If the
                  branch cache is for a filtered repo view, an optional third hash is
                  included that hashes the hashes of all filtered revisions.
                  The open/closed state is represented by a single letter 'o' or 'c'.
                  This field can be used to avoid changelog reads when determining if a
                  branch head closes a branch or not.
                  """
                  def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
                               filteredhash=None, closednodes=None):
                      super(branchcache, self).__init__(entries)
                      self.tipnode = tipnode
                      self.tiprev = tiprev
                      self.filteredhash = filteredhash
                      # closednodes is a set of nodes that close their branch. If the branch
                      # cache has been updated, it may contain nodes that are no longer
                      # heads.
                      if closednodes is None:
                          self._closednodes = set()
                      else:
                          self._closednodes = closednodes
                  def validfor(self, repo):
                      """Is the cache content valid regarding a repo
                      - False when cached tipnode is unknown or if we detect a strip.
                      - True when cache is up to date or a subset of current repo."""
                      try:
                          return ((self.tipnode == repo.changelog.node(self.tiprev))
                                  and (self.filteredhash == \
                                       scmutil.filteredhash(repo, self.tiprev)))
                      except IndexError:
                          return False
                  def _branchtip(self, heads):
                      '''Return tuple with last open head in heads and false,
                      otherwise return last closed head and true.'''
                      tip = heads[-1]
                      closed = True
                      for h in reversed(heads):
                          if h not in self._closednodes:
                              tip = h
                              closed = False
                              break
                      return tip, closed
                  def branchtip(self, branch):
                      '''Return the tipmost open head on branch head, otherwise return the
                      tipmost closed head on branch.
                      Raise KeyError for unknown branch.'''
                      return self._branchtip(self[branch])[0]
                  def branchheads(self, branch, closed=False):
                      heads = self[branch]
                      if not closed:
                          heads = [h for h in heads if h not in self._closednodes]
                      return heads
                  def iterbranches(self):
                      for bn, heads in self.iteritems():
                          yield (bn, heads) + self._branchtip(heads)
                  def copy(self):
                      """return an deep copy of the branchcache object"""
                      return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
                                         self._closednodes)
                  def write(self, repo):
                      try:
                          f = repo.vfs(_filename(repo), "w", atomictemp=True)
                          cachekey = [hex(self.tipnode), str(self.tiprev)]
                          if self.filteredhash is not None:
                              cachekey.append(hex(self.filteredhash))
                          f.write(" ".join(cachekey) + '\n')
                          nodecount = 0
                          for label, nodes in sorted(self.iteritems()):
                              for node in nodes:
                                  nodecount += 1
                                  if node in self._closednodes:
                                      state = 'c'
                                  else:
                                      state = 'o'
                                  f.write("%s %s %s\n" % (hex(node), state,
                                                          encoding.fromlocal(label)))
                          f.close()
                          repo.ui.log('branchcache',
                                      'wrote %s branch cache with %d labels and %d nodes\n',
                                      repo.filtername, len(self), nodecount)
                      except (IOError, OSError, error.Abort) as inst:
                          repo.ui.debug("couldn't write branch cache: %s\n" % inst)
                          # Abort may be raise by read only opener
                          pass
                  def update(self, repo, revgen):
                      """Given a branchhead cache, self, that may have extra nodes or be
                      missing heads, and a generator of nodes that are strictly a superset of
                      heads missing, this function updates self to be correct.
                      """
                      starttime = util.timer()
                      cl = repo.changelog
                      # collect new branch entries
                      newbranches = {}
                      getbranchinfo = repo.revbranchcache().branchinfo
                      for r in revgen:
                          branch, closesbranch = getbranchinfo(r)
                          newbranches.setdefault(branch, []).append(r)
                          if closesbranch:
                              self._closednodes.add(cl.node(r))
                      # fetch current topological heads to speed up filtering
                      topoheads = set(cl.headrevs())
                      # if older branchheads are reachable from new ones, they aren't
                      # really branchheads. Note checking parents is insufficient:
                      # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
                      for branch, newheadrevs in newbranches.iteritems():
                          bheads = self.setdefault(branch, [])
                          bheadset = set(cl.rev(node) for node in bheads)
                          # This have been tested True on all internal usage of this function.
                          # run it again in case of doubt
                          # assert not (set(bheadrevs) & set(newheadrevs))
                          newheadrevs.sort()
                          bheadset.update(newheadrevs)
                          # This prunes out two kinds of heads - heads that are superseded by
                          # a head in newheadrevs, and newheadrevs that are not heads because
                          # an existing head is their descendant.
                          uncertain = bheadset - topoheads
                          if uncertain:
                              floorrev = min(uncertain)
                              ancestors = set(cl.ancestors(newheadrevs, floorrev))
                              bheadset -= ancestors
                          bheadrevs = sorted(bheadset)
                          self[branch] = [cl.node(rev) for rev in bheadrevs]
                          tiprev = bheadrevs[-1]
                          if tiprev > self.tiprev:
                              self.tipnode = cl.node(tiprev)
                              self.tiprev = tiprev
                      if not self.validfor(repo):
                          # cache key are not valid anymore
                          self.tipnode = nullid
                          self.tiprev = nullrev
                          for heads in self.values():
                              tiprev = max(cl.rev(node) for node in heads)
                              if tiprev > self.tiprev:
                                  self.tipnode = cl.node(tiprev)
                                  self.tiprev = tiprev
                      self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
                      duration = util.timer() - starttime
                      repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
                                  repo.filtername, duration)
              # Revision branch info cache
              _rbcversion = '-v1'
              _rbcnames = 'cache/rbc-names' + _rbcversion
              _rbcrevs = 'cache/rbc-revs' + _rbcversion
              # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
              _rbcrecfmt = '>4sI'
              _rbcrecsize = calcsize(_rbcrecfmt)
              _rbcnodelen = 4
              _rbcbranchidxmask = 0x7fffffff
              _rbccloseflag = 0x80000000
              class revbranchcache(object):
                  """Persistent cache, mapping from revision number to branch name and close.
                  This is a low level cache, independent of filtering.
                  Branch names are stored in rbc-names in internal encoding separated by 0.
                  rbc-names is append-only, and each branch name is only stored once and will
                  thus have a unique index.
                  The branch info for each revision is stored in rbc-revs as constant size
                  records. The whole file is read into memory, but it is only 'parsed' on
                  demand. The file is usually append-only but will be truncated if repo
                  modification is detected.
                  The record for each revision contains the first 4 bytes of the
                  corresponding node hash, and the record is only used if it still matches.
                  Even a completely trashed rbc-revs fill thus still give the right result
                  while converging towards full recovery ... assuming no incorrectly matching
                  node hashes.
                  The record also contains 4 bytes where 31 bits contains the index of the
                  branch and the last bit indicate that it is a branch close commit.
                  The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
                  and will grow with it but be 1/8th of its size.
                  """
                  def __init__(self, repo, readonly=True):
                      assert repo.filtername is None
                      self._repo = repo
                      self._names = [] # branch names in local encoding with static index
-                     self._rbcrevs = array('c') # structs of type _rbcrecfmt
+                     self._rbcrevs = bytearray()
                      self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
                      try:
                          bndata = repo.vfs.read(_rbcnames)
                          self._rbcsnameslen = len(bndata) # for verification before writing
                          self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
                      except (IOError, OSError):
                          if readonly:
                              # don't try to use cache - fall back to the slow path
                              self.branchinfo = self._branchinfo
                      if self._names:
                          try:
                              data = repo.vfs.read(_rbcrevs)
-                             self._rbcrevs.fromstring(data)
+                             self._rbcrevs[:] = data
                          except (IOError, OSError) as inst:
                              repo.ui.debug("couldn't read revision branch cache: %s\n" %
                                            inst)
                      # remember number of good records on disk
                      self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
                                             len(repo.changelog))
                      if self._rbcrevslen == 0:
                          self._names = []
                      self._rbcnamescount = len(self._names) # number of names read at
                                                             # _rbcsnameslen
                      self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
                  def _clear(self):
                      self._rbcsnameslen = 0
                      del self._names[:]
                      self._rbcnamescount = 0
                      self._namesreverse.clear()
                      self._rbcrevslen = len(self._repo.changelog)
-                     self._rbcrevs = array('c')
-                     self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
+                     self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
                  def branchinfo(self, rev):
                      """Return branch name and close flag for rev, using and updating
                      persistent cache."""
                      changelog = self._repo.changelog
                      rbcrevidx = rev * _rbcrecsize
                      # avoid negative index, changelog.read(nullrev) is fast without cache
                      if rev == nullrev:
                          return changelog.branchinfo(rev)
                      # if requested rev isn't allocated, grow and cache the rev info
                      if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
                          return self._branchinfo(rev)
                      # fast path: extract data from cache, use it if node is matching
                      reponode = changelog.node(rev)[:_rbcnodelen]
                      cachenode, branchidx = unpack(
                          _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
                      close = bool(branchidx & _rbccloseflag)
                      if close:
                          branchidx &= _rbcbranchidxmask
                      if cachenode == '\0\0\0\0':
                          pass
                      elif cachenode == reponode:
                          try:
                              return self._names[branchidx], close
                          except IndexError:
                              # recover from invalid reference to unknown branch
                              self._repo.ui.debug("referenced branch names not found"
                                  " - rebuilding revision branch cache from scratch\n")
                              self._clear()
                      else:
                          # rev/node map has changed, invalidate the cache from here up
                          self._repo.ui.debug("history modification detected - truncating "
                              "revision branch cache to revision %s\n" % rev)
                          truncate = rbcrevidx + _rbcrecsize
                          del self._rbcrevs[truncate:]
                          self._rbcrevslen = min(self._rbcrevslen, truncate)
                      # fall back to slow path and make sure it will be written to disk
                      return self._branchinfo(rev)
                  def _branchinfo(self, rev):
                      """Retrieve branch info from changelog and update _rbcrevs"""
                      changelog = self._repo.changelog
                      b, close = changelog.branchinfo(rev)
                      if b in self._namesreverse:
                          branchidx = self._namesreverse[b]
                      else:
                          branchidx = len(self._names)
                          self._names.append(b)
                          self._namesreverse[b] = branchidx
                      reponode = changelog.node(rev)
                      if close:
                          branchidx |= _rbccloseflag
                      self._setcachedata(rev, reponode, branchidx)
                      return b, close
                  def _setcachedata(self, rev, node, branchidx):
                      """Writes the node's branch data to the in-memory cache data."""
                      rbcrevidx = rev * _rbcrecsize
-                     rec = array('c')
-                     rec.fromstring(pack(_rbcrecfmt, node, branchidx))
+                     rec = bytearray(pack(_rbcrecfmt, node, branchidx))
                      if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
                          self._rbcrevs.extend('\0' *
                                               (len(self._repo.changelog) * _rbcrecsize -
                                                len(self._rbcrevs)))
                      self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
                      self._rbcrevslen = min(self._rbcrevslen, rev)
                      tr = self._repo.currenttransaction()
                      if tr:
                          tr.addfinalize('write-revbranchcache', self.write)
                  def write(self, tr=None):
                      """Save branch cache if it is dirty."""
                      repo = self._repo
                      wlock = None
                      step = ''
                      try:
                          if self._rbcnamescount < len(self._names):
                              step = ' names'
                              wlock = repo.wlock(wait=False)
                              if self._rbcnamescount != 0:
                                  f = repo.vfs.open(_rbcnames, 'ab')
                                  if f.tell() == self._rbcsnameslen:
                                      f.write('\0')
                                  else:
                                      f.close()
                                      repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
                                      self._rbcnamescount = 0
                                      self._rbcrevslen = 0
                              if self._rbcnamescount == 0:
                                  # before rewriting names, make sure references are removed
                                  repo.vfs.unlinkpath(_rbcrevs, ignoremissing=True)
                                  f = repo.vfs.open(_rbcnames, 'wb')
                              f.write('\0'.join(encoding.fromlocal(b)
                                                for b in self._names[self._rbcnamescount:]))
                              self._rbcsnameslen = f.tell()
                              f.close()
                              self._rbcnamescount = len(self._names)
                          start = self._rbcrevslen * _rbcrecsize
                          if start != len(self._rbcrevs):
                              step = ''
                              if wlock is None:
                                  wlock = repo.wlock(wait=False)
                              revs = min(len(repo.changelog),
                                         len(self._rbcrevs) // _rbcrecsize)
                              f = repo.vfs.open(_rbcrevs, 'ab')
                              if f.tell() != start:
                                  repo.ui.debug("truncating %s to %s\n" % (_rbcrevs, start))
                                  f.seek(start)
                                  if f.tell() != start:
                                      start = 0
                                      f.seek(start)
                                  f.truncate()
                              end = revs * _rbcrecsize
                              f.write(self._rbcrevs[start:end])
                              f.close()
                              self._rbcrevslen = revs
                      except (IOError, OSError, error.Abort, error.LockError) as inst:
                          repo.ui.debug("couldn't write revision branch cache%s: %s\n"
                                        % (step, inst))
                      finally:
                          if wlock is not None:
                              wlock.release()

mercurial/bundlerepo.py

0 +1 -1

              # bundlerepo.py - repository class for viewing uncompressed bundles
              #
              # Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              """Repository class for viewing uncompressed bundles.
              This provides a read-only repository interface to bundles as if they
              were part of the actual repository.
              """
              from __future__ import absolute_import
              import os
              import shutil
              import tempfile
              from .i18n import _
              from .node import nullid
              from . import (
                  bundle2,
                  changegroup,
                  changelog,
                  cmdutil,
                  discovery,
                  error,
                  exchange,
                  filelog,
                  localrepo,
                  manifest,
                  mdiff,
                  node as nodemod,
                  pathutil,
                  phases,
                  pycompat,
                  revlog,
                  util,
                  vfs as vfsmod,
              )
              class bundlerevlog(revlog.revlog):
                  def __init__(self, opener, indexfile, bundle, linkmapper):
                      # How it works:
                      # To retrieve a revision, we need to know the offset of the revision in
                      # the bundle (an unbundle object). We store this offset in the index
                      # (start). The base of the delta is stored in the base field.
                      #
                      # To differentiate a rev in the bundle from a rev in the revlog, we
                      # check revision against repotiprev.
                      opener = vfsmod.readonlyvfs(opener)
                      revlog.revlog.__init__(self, opener, indexfile)
                      self.bundle = bundle
                      n = len(self)
                      self.repotiprev = n - 1
                      chain = None
                      self.bundlerevs = set() # used by 'bundle()' revset expression
                      getchunk = lambda: bundle.deltachunk(chain)
                      for chunkdata in iter(getchunk, {}):
                          node = chunkdata['node']
                          p1 = chunkdata['p1']
                          p2 = chunkdata['p2']
                          cs = chunkdata['cs']
                          deltabase = chunkdata['deltabase']
                          delta = chunkdata['delta']
                          size = len(delta)
                          start = bundle.tell() - size
                          link = linkmapper(cs)
                          if node in self.nodemap:
                              # this can happen if two branches make the same change
                              chain = node
                              self.bundlerevs.add(self.nodemap[node])
                              continue
                          for p in (p1, p2):
                              if p not in self.nodemap:
                                  raise error.LookupError(p, self.indexfile,
                                                          _("unknown parent"))
                          if deltabase not in self.nodemap:
                              raise LookupError(deltabase, self.indexfile,
                                                _('unknown delta base'))
                          baserev = self.rev(deltabase)
                          # start, size, full unc. size, base (unused), link, p1, p2, node
                          e = (revlog.offset_type(start, 0), size, -1, baserev, link,
                               self.rev(p1), self.rev(p2), node)
                          self.index.insert(-1, e)
                          self.nodemap[node] = n
                          self.bundlerevs.add(n)
                          chain = node
                          n += 1
                  def _chunk(self, rev):
                      # Warning: in case of bundle, the diff is against what we stored as
                      # delta base, not against rev - 1
                      # XXX: could use some caching
                      if rev <= self.repotiprev:
                          return revlog.revlog._chunk(self, rev)
                      self.bundle.seek(self.start(rev))
                      return self.bundle.read(self.length(rev))
                  def revdiff(self, rev1, rev2):
                      """return or calculate a delta between two revisions"""
                      if rev1 > self.repotiprev and rev2 > self.repotiprev:
                          # hot path for bundle
                          revb = self.index[rev2][3]
                          if revb == rev1:
                              return self._chunk(rev2)
                      elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
                          return revlog.revlog.revdiff(self, rev1, rev2)
                      return mdiff.textdiff(self.revision(self.node(rev1)),
                                            self.revision(self.node(rev2)))
                  def revision(self, nodeorrev, raw=False):
                      """return an uncompressed revision of a given node or revision
                      number.
                      """
                      if isinstance(nodeorrev, int):
                          rev = nodeorrev
                          node = self.node(rev)
                      else:
                          node = nodeorrev
                          rev = self.rev(node)
                      if node == nullid:
                          return ""
                      text = None
                      chain = []
                      iterrev = rev
                      # reconstruct the revision if it is from a changegroup
                      while iterrev > self.repotiprev:
                          if self._cache and self._cache[1] == iterrev:
                              text = self._cache[2]
                              break
                          chain.append(iterrev)
                          iterrev = self.index[iterrev][3]
                      if text is None:
                          text = self.baserevision(iterrev)
                      while chain:
                          delta = self._chunk(chain.pop())
                          text = mdiff.patches(text, [delta])
                      text, validatehash = self._processflags(text, self.flags(rev),
                                                              'read', raw=raw)
                      if validatehash:
                          self.checkhash(text, node, rev=rev)
                      self._cache = (node, rev, text)
                      return text
                  def baserevision(self, nodeorrev):
                      # Revlog subclasses may override 'revision' method to modify format of
                      # content retrieved from revlog. To use bundlerevlog with such class one
                      # needs to override 'baserevision' and make more specific call here.
                      return revlog.revlog.revision(self, nodeorrev)
                  def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                      raise NotImplementedError
                  def addgroup(self, revs, linkmapper, transaction):
                      raise NotImplementedError
                  def strip(self, rev, minlink):
                      raise NotImplementedError
                  def checksize(self):
                      raise NotImplementedError
              class bundlechangelog(bundlerevlog, changelog.changelog):
                  def __init__(self, opener, bundle):
                      changelog.changelog.__init__(self, opener)
                      linkmapper = lambda x: x
                      bundlerevlog.__init__(self, opener, self.indexfile, bundle,
                                            linkmapper)
                  def baserevision(self, nodeorrev):
                      # Although changelog doesn't override 'revision' method, some extensions
                      # may replace this class with another that does. Same story with
                      # manifest and filelog classes.
                      # This bypasses filtering on changelog.node() and rev() because we need
                      # revision text of the bundle base even if it is hidden.
                      oldfilter = self.filteredrevs
                      try:
                          self.filteredrevs = ()
                          return changelog.changelog.revision(self, nodeorrev)
                      finally:
                          self.filteredrevs = oldfilter
              class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
                  def __init__(self, opener, bundle, linkmapper, dirlogstarts=None, dir=''):
                      manifest.manifestrevlog.__init__(self, opener, dir=dir)
                      bundlerevlog.__init__(self, opener, self.indexfile, bundle,
                                            linkmapper)
                      if dirlogstarts is None:
                          dirlogstarts = {}
                          if self.bundle.version == "03":
                              dirlogstarts = _getfilestarts(self.bundle)
                      self._dirlogstarts = dirlogstarts
                      self._linkmapper = linkmapper
                  def baserevision(self, nodeorrev):
                      node = nodeorrev
                      if isinstance(node, int):
                          node = self.node(node)
                      if node in self.fulltextcache:
-                         result = self.fulltextcache[node].tostring()
+                         result = '%s' % self.fulltextcache[node]
                      else:
                          result = manifest.manifestrevlog.revision(self, nodeorrev)
                      return result
                  def dirlog(self, d):
                      if d in self._dirlogstarts:
                          self.bundle.seek(self._dirlogstarts[d])
                          return bundlemanifest(
                              self.opener, self.bundle, self._linkmapper,
                              self._dirlogstarts, dir=d)
                      return super(bundlemanifest, self).dirlog(d)
              class bundlefilelog(bundlerevlog, filelog.filelog):
                  def __init__(self, opener, path, bundle, linkmapper):
                      filelog.filelog.__init__(self, opener, path)
                      bundlerevlog.__init__(self, opener, self.indexfile, bundle,
                                            linkmapper)
                  def baserevision(self, nodeorrev):
                      return filelog.filelog.revision(self, nodeorrev)
              class bundlepeer(localrepo.localpeer):
                  def canpush(self):
                      return False
              class bundlephasecache(phases.phasecache):
                  def __init__(self, *args, **kwargs):
                      super(bundlephasecache, self).__init__(*args, **kwargs)
                      if util.safehasattr(self, 'opener'):
                          self.opener = vfsmod.readonlyvfs(self.opener)
                  def write(self):
                      raise NotImplementedError
                  def _write(self, fp):
                      raise NotImplementedError
                  def _updateroots(self, phase, newroots, tr):
                      self.phaseroots[phase] = newroots
                      self.invalidate()
                      self.dirty = True
              def _getfilestarts(bundle):
                  bundlefilespos = {}
                  for chunkdata in iter(bundle.filelogheader, {}):
                      fname = chunkdata['filename']
                      bundlefilespos[fname] = bundle.tell()
                      for chunk in iter(lambda: bundle.deltachunk(None), {}):
                          pass
                  return bundlefilespos
              class bundlerepository(localrepo.localrepository):
                  def __init__(self, ui, path, bundlename):
                      def _writetempbundle(read, suffix, header=''):
                          """Write a temporary file to disk
                          This is closure because we need to make sure this tracked by
                          self.tempfile for cleanup purposes."""
                          fdtemp, temp = self.vfs.mkstemp(prefix="hg-bundle-",
                                                          suffix=".hg10un")
                          self.tempfile = temp
                          with os.fdopen(fdtemp, pycompat.sysstr('wb')) as fptemp:
                              fptemp.write(header)
                              while True:
                                  chunk = read(2**18)
                                  if not chunk:
                                      break
                                  fptemp.write(chunk)
                          return self.vfs.open(self.tempfile, mode="rb")
                      self._tempparent = None
                      try:
                          localrepo.localrepository.__init__(self, ui, path)
                      except error.RepoError:
                          self._tempparent = tempfile.mkdtemp()
                          localrepo.instance(ui, self._tempparent, 1)
                          localrepo.localrepository.__init__(self, ui, self._tempparent)
                      self.ui.setconfig('phases', 'publish', False, 'bundlerepo')
                      if path:
                          self._url = 'bundle:' + util.expandpath(path) + '+' + bundlename
                      else:
                          self._url = 'bundle:' + bundlename
                      self.tempfile = None
                      f = util.posixfile(bundlename, "rb")
                      self.bundlefile = self.bundle = exchange.readbundle(ui, f, bundlename)
                      if isinstance(self.bundle, bundle2.unbundle20):
                          cgstream = None
                          for part in self.bundle.iterparts():
                              if part.type == 'changegroup':
                                  if cgstream is not None:
                                      raise NotImplementedError("can't process "
                                                                "multiple changegroups")
                                  cgstream = part
                                  version = part.params.get('version', '01')
                                  legalcgvers = changegroup.supportedincomingversions(self)
                                  if version not in legalcgvers:
                                      msg = _('Unsupported changegroup version: %s')
                                      raise error.Abort(msg % version)
                                  if self.bundle.compressed():
                                      cgstream = _writetempbundle(part.read,
                                                                  ".cg%sun" % version)
                          if cgstream is None:
                              raise error.Abort(_('No changegroups found'))
                          cgstream.seek(0)
                          self.bundle = changegroup.getunbundler(version, cgstream, 'UN')
                      elif self.bundle.compressed():
                          f = _writetempbundle(self.bundle.read, '.hg10un', header='HG10UN')
                          self.bundlefile = self.bundle = exchange.readbundle(ui, f,
                                                                              bundlename,
                                                                              self.vfs)
                      # dict with the mapping 'filename' -> position in the bundle
                      self.bundlefilespos = {}
                      self.firstnewrev = self.changelog.repotiprev + 1
                      phases.retractboundary(self, None, phases.draft,
                                             [ctx.node() for ctx in self[self.firstnewrev:]])
                  @localrepo.unfilteredpropertycache
                  def _phasecache(self):
                      return bundlephasecache(self, self._phasedefaults)
                  @localrepo.unfilteredpropertycache
                  def changelog(self):
                      # consume the header if it exists
                      self.bundle.changelogheader()
                      c = bundlechangelog(self.svfs, self.bundle)
                      self.manstart = self.bundle.tell()
                      return c
                  def _constructmanifest(self):
                      self.bundle.seek(self.manstart)
                      # consume the header if it exists
                      self.bundle.manifestheader()
                      linkmapper = self.unfiltered().changelog.rev
                      m = bundlemanifest(self.svfs, self.bundle, linkmapper)
                      self.filestart = self.bundle.tell()
                      return m
                  @localrepo.unfilteredpropertycache
                  def manstart(self):
                      self.changelog
                      return self.manstart
                  @localrepo.unfilteredpropertycache
                  def filestart(self):
                      self.manifestlog
                      return self.filestart
                  def url(self):
                      return self._url
                  def file(self, f):
                      if not self.bundlefilespos:
                          self.bundle.seek(self.filestart)
                          self.bundlefilespos = _getfilestarts(self.bundle)
                      if f in self.bundlefilespos:
                          self.bundle.seek(self.bundlefilespos[f])
                          linkmapper = self.unfiltered().changelog.rev
                          return bundlefilelog(self.svfs, f, self.bundle, linkmapper)
                      else:
                          return filelog.filelog(self.svfs, f)
                  def close(self):
                      """Close assigned bundle file immediately."""
                      self.bundlefile.close()
                      if self.tempfile is not None:
                          self.vfs.unlink(self.tempfile)
                      if self._tempparent:
                          shutil.rmtree(self._tempparent, True)
                  def cancopy(self):
                      return False
                  def peer(self):
                      return bundlepeer(self)
                  def getcwd(self):
                      return pycompat.getcwd() # always outside the repo
                  # Check if parents exist in localrepo before setting
                  def setparents(self, p1, p2=nullid):
                      p1rev = self.changelog.rev(p1)
                      p2rev = self.changelog.rev(p2)
                      msg = _("setting parent to node %s that only exists in the bundle\n")
                      if self.changelog.repotiprev < p1rev:
                          self.ui.warn(msg % nodemod.hex(p1))
                      if self.changelog.repotiprev < p2rev:
                          self.ui.warn(msg % nodemod.hex(p2))
                      return super(bundlerepository, self).setparents(p1, p2)
              def instance(ui, path, create):
                  if create:
                      raise error.Abort(_('cannot create new bundle repository'))
                  # internal config: bundle.mainreporoot
                  parentpath = ui.config("bundle", "mainreporoot", "")
                  if not parentpath:
                      # try to find the correct path to the working directory repo
                      parentpath = cmdutil.findrepo(pycompat.getcwd())
                      if parentpath is None:
                          parentpath = ''
                  if parentpath:
                      # Try to make the full path relative so we get a nice, short URL.
                      # In particular, we don't want temp dir names in test outputs.
                      cwd = pycompat.getcwd()
                      if parentpath == cwd:
                          parentpath = ''
                      else:
                          cwd = pathutil.normasprefix(cwd)
                          if parentpath.startswith(cwd):
                              parentpath = parentpath[len(cwd):]
                  u = util.url(path)
                  path = u.localpath()
                  if u.scheme == 'bundle':
                      s = path.split("+", 1)
                      if len(s) == 1:
                          repopath, bundlename = parentpath, s[0]
                      else:
                          repopath, bundlename = s
                  else:
                      repopath, bundlename = parentpath, path
                  return bundlerepository(ui, repopath, bundlename)
              class bundletransactionmanager(object):
                  def transaction(self):
                      return None
                  def close(self):
                      raise NotImplementedError
                  def release(self):
                      raise NotImplementedError
              def getremotechanges(ui, repo, other, onlyheads=None, bundlename=None,
                                   force=False):
                  '''obtains a bundle of changes incoming from other
                  "onlyheads" restricts the returned changes to those reachable from the
                    specified heads.
                  "bundlename", if given, stores the bundle to this file path permanently;
                    otherwise it's stored to a temp file and gets deleted again when you call
                    the returned "cleanupfn".
                  "force" indicates whether to proceed on unrelated repos.
                  Returns a tuple (local, csets, cleanupfn):
                  "local" is a local repo from which to obtain the actual incoming
                    changesets; it is a bundlerepo for the obtained bundle when the
                    original "other" is remote.
                  "csets" lists the incoming changeset node ids.
                  "cleanupfn" must be called without arguments when you're done processing
                    the changes; it closes both the original "other" and the one returned
                    here.
                  '''
                  tmp = discovery.findcommonincoming(repo, other, heads=onlyheads,
                                                     force=force)
                  common, incoming, rheads = tmp
                  if not incoming:
                      try:
                          if bundlename:
                              os.unlink(bundlename)
                      except OSError:
                          pass
                      return repo, [], other.close
                  commonset = set(common)
                  rheads = [x for x in rheads if x not in commonset]
                  bundle = None
                  bundlerepo = None
                  localrepo = other.local()
                  if bundlename or not localrepo:
                      # create a bundle (uncompressed if other repo is not local)
                      # developer config: devel.legacy.exchange
                      legexc = ui.configlist('devel', 'legacy.exchange')
                      forcebundle1 = 'bundle2' not in legexc and 'bundle1' in legexc
                      canbundle2 = (not forcebundle1
                                    and other.capable('getbundle')
                                    and other.capable('bundle2'))
                      if canbundle2:
                          kwargs = {}
                          kwargs['common'] = common
                          kwargs['heads'] = rheads
                          kwargs['bundlecaps'] = exchange.caps20to10(repo)
                          kwargs['cg'] = True
                          b2 = other.getbundle('incoming', **kwargs)
                          fname = bundle = changegroup.writechunks(ui, b2._forwardchunks(),
                                                                   bundlename)
                      else:
                          if other.capable('getbundle'):
                              cg = other.getbundle('incoming', common=common, heads=rheads)
                          elif onlyheads is None and not other.capable('changegroupsubset'):
                              # compat with older servers when pulling all remote heads
                              cg = other.changegroup(incoming, "incoming")
                              rheads = None
                          else:
                              cg = other.changegroupsubset(incoming, rheads, 'incoming')
                          if localrepo:
                              bundletype = "HG10BZ"
                          else:
                              bundletype = "HG10UN"
                          fname = bundle = bundle2.writebundle(ui, cg, bundlename,
                                                                   bundletype)
                      # keep written bundle?
                      if bundlename:
                          bundle = None
                      if not localrepo:
                          # use the created uncompressed bundlerepo
                          localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root,
                                                                    fname)
                          # this repo contains local and other now, so filter out local again
                          common = repo.heads()
                  if localrepo:
                      # Part of common may be remotely filtered
                      # So use an unfiltered version
                      # The discovery process probably need cleanup to avoid that
                      localrepo = localrepo.unfiltered()
                  csets = localrepo.changelog.findmissing(common, rheads)
                  if bundlerepo:
                      reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev:]]
                      remotephases = other.listkeys('phases')
                      pullop = exchange.pulloperation(bundlerepo, other, heads=reponodes)
                      pullop.trmanager = bundletransactionmanager()
                      exchange._pullapplyphases(pullop, remotephases)
                  def cleanup():
                      if bundlerepo:
                          bundlerepo.close()
                      if bundle:
                          os.unlink(bundle)
                      other.close()
                  return (localrepo, csets, cleanup)

mercurial/manifest.py

0 +8 -8

              # manifest.py - manifest revision class for mercurial
              #
              # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
-             import array
              import heapq
              import os
              import struct
              from .i18n import _
              from . import (
                  error,
                  mdiff,
                  parsers,
                  revlog,
                  util,
              )
              propertycache = util.propertycache
              def _parsev1(data):
                  # This method does a little bit of excessive-looking
                  # precondition checking. This is so that the behavior of this
                  # class exactly matches its C counterpart to try and help
                  # prevent surprise breakage for anyone that develops against
                  # the pure version.
                  if data and data[-1] != '\n':
                      raise ValueError('Manifest did not end in a newline.')
                  prev = None
                  for l in data.splitlines():
                      if prev is not None and prev > l:
                          raise ValueError('Manifest lines not in sorted order.')
                      prev = l
                      f, n = l.split('\0')
                      if len(n) > 40:
                          yield f, revlog.bin(n[:40]), n[40:]
                      else:
                          yield f, revlog.bin(n), ''
              def _parsev2(data):
                  metadataend = data.find('\n')
                  # Just ignore metadata for now
                  pos = metadataend + 1
                  prevf = ''
                  while pos < len(data):
                      end = data.find('\n', pos + 1) # +1 to skip stem length byte
                      if end == -1:
                          raise ValueError('Manifest ended with incomplete file entry.')
                      stemlen = ord(data[pos])
                      items = data[pos + 1:end].split('\0')
                      f = prevf[:stemlen] + items[0]
                      if prevf > f:
                          raise ValueError('Manifest entries not in sorted order.')
                      fl = items[1]
                      # Just ignore metadata (items[2:] for now)
                      n = data[end + 1:end + 21]
                      yield f, n, fl
                      pos = end + 22
                      prevf = f
              def _parse(data):
                  """Generates (path, node, flags) tuples from a manifest text"""
                  if data.startswith('\0'):
                      return iter(_parsev2(data))
                  else:
                      return iter(_parsev1(data))
              def _text(it, usemanifestv2):
                  """Given an iterator over (path, node, flags) tuples, returns a manifest
                  text"""
                  if usemanifestv2:
                      return _textv2(it)
                  else:
                      return _textv1(it)
              def _textv1(it):
                  files = []
                  lines = []
                  _hex = revlog.hex
                  for f, n, fl in it:
                      files.append(f)
                      # if this is changed to support newlines in filenames,
                      # be sure to check the templates/ dir again (especially *-raw.tmpl)
                      lines.append("%s\0%s%s\n" % (f, _hex(n), fl))
                  _checkforbidden(files)
                  return ''.join(lines)
              def _textv2(it):
                  files = []
                  lines = ['\0\n']
                  prevf = ''
                  for f, n, fl in it:
                      files.append(f)
                      stem = os.path.commonprefix([prevf, f])
                      stemlen = min(len(stem), 255)
                      lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
                      prevf = f
                  _checkforbidden(files)
                  return ''.join(lines)
              class lazymanifestiter(object):
                  def __init__(self, lm):
                      self.pos = 0
                      self.lm = lm
                  def __iter__(self):
                      return self
                  def next(self):
                      try:
                          data, pos = self.lm._get(self.pos)
                      except IndexError:
                          raise StopIteration
                      if pos == -1:
                          self.pos += 1
                          return data[0]
                      self.pos += 1
                      zeropos = data.find('\x00', pos)
                      return data[pos:zeropos]
              class lazymanifestiterentries(object):
                  def __init__(self, lm):
                      self.lm = lm
                      self.pos = 0
                  def __iter__(self):
                      return self
                  def next(self):
                      try:
                          data, pos = self.lm._get(self.pos)
                      except IndexError:
                          raise StopIteration
                      if pos == -1:
                          self.pos += 1
                          return data
                      zeropos = data.find('\x00', pos)
                      hashval = unhexlify(data, self.lm.extrainfo[self.pos],
                                          zeropos + 1, 40)
                      flags = self.lm._getflags(data, self.pos, zeropos)
                      self.pos += 1
                      return (data[pos:zeropos], hashval, flags)
              def unhexlify(data, extra, pos, length):
                  s = data[pos:pos + length].decode('hex')
                  if extra:
                      s += chr(extra & 0xff)
                  return s
              def _cmp(a, b):
                  return (a > b) - (a < b)
              class _lazymanifest(object):
                  def __init__(self, data, positions=None, extrainfo=None, extradata=None):
                      if positions is None:
                          self.positions = self.findlines(data)
                          self.extrainfo = [0] * len(self.positions)
                          self.data = data
                          self.extradata = []
                      else:
                          self.positions = positions[:]
                          self.extrainfo = extrainfo[:]
                          self.extradata = extradata[:]
                          self.data = data
                  def findlines(self, data):
                      if not data:
                          return []
                      pos = data.find("\n")
                      if pos == -1 or data[-1] != '\n':
                          raise ValueError("Manifest did not end in a newline.")
                      positions = [0]
                      prev = data[:data.find('\x00')]
                      while pos < len(data) - 1 and pos != -1:
                          positions.append(pos + 1)
                          nexts = data[pos + 1:data.find('\x00', pos + 1)]
                          if nexts < prev:
                              raise ValueError("Manifest lines not in sorted order.")
                          prev = nexts
                          pos = data.find("\n", pos + 1)
                      return positions
                  def _get(self, index):
                      # get the position encoded in pos:
                      #   positive number is an index in 'data'
                      #   negative number is in extrapieces
                      pos = self.positions[index]
                      if pos >= 0:
                          return self.data, pos
                      return self.extradata[-pos - 1], -1
                  def _getkey(self, pos):
                      if pos >= 0:
                          return self.data[pos:self.data.find('\x00', pos + 1)]
                      return self.extradata[-pos - 1][0]
                  def bsearch(self, key):
                      first = 0
                      last = len(self.positions) - 1
                      while first <= last:
                          midpoint = (first + last)//2
                          nextpos = self.positions[midpoint]
                          candidate = self._getkey(nextpos)
                          r = _cmp(key, candidate)
                          if r == 0:
                              return midpoint
                          else:
                              if r < 0:
                                  last = midpoint - 1
                              else:
                                  first = midpoint + 1
                      return -1
                  def bsearch2(self, key):
                      # same as the above, but will always return the position
                      # done for performance reasons
                      first = 0
                      last = len(self.positions) - 1
                      while first <= last:
                          midpoint = (first + last)//2
                          nextpos = self.positions[midpoint]
                          candidate = self._getkey(nextpos)
                          r = _cmp(key, candidate)
                          if r == 0:
                              return (midpoint, True)
                          else:
                              if r < 0:
                                  last = midpoint - 1
                              else:
                                  first = midpoint + 1
                      return (first, False)
                  def __contains__(self, key):
                      return self.bsearch(key) != -1
                  def _getflags(self, data, needle, pos):
                      start = pos + 41
                      end = data.find("\n", start)
                      if end == -1:
                          end = len(data) - 1
                      if start == end:
                          return ''
                      return self.data[start:end]
                  def __getitem__(self, key):
                      if not isinstance(key, str):
                          raise TypeError("getitem: manifest keys must be a string.")
                      needle = self.bsearch(key)
                      if needle == -1:
                          raise KeyError
                      data, pos = self._get(needle)
                      if pos == -1:
                          return (data[1], data[2])
                      zeropos = data.find('\x00', pos)
                      assert 0 <= needle <= len(self.positions)
                      assert len(self.extrainfo) == len(self.positions)
                      hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
                      flags = self._getflags(data, needle, zeropos)
                      return (hashval, flags)
                  def __delitem__(self, key):
                      needle, found = self.bsearch2(key)
                      if not found:
                          raise KeyError
                      cur = self.positions[needle]
                      self.positions = self.positions[:needle] + self.positions[needle + 1:]
                      self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
                      if cur >= 0:
                          self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
                  def __setitem__(self, key, value):
                      if not isinstance(key, str):
                          raise TypeError("setitem: manifest keys must be a string.")
                      if not isinstance(value, tuple) or len(value) != 2:
                          raise TypeError("Manifest values must be a tuple of (node, flags).")
                      hashval = value[0]
                      if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22:
                          raise TypeError("node must be a 20-byte string")
                      flags = value[1]
                      if len(hashval) == 22:
                          hashval = hashval[:-1]
                      if not isinstance(flags, str) or len(flags) > 1:
                          raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
                      needle, found = self.bsearch2(key)
                      if found:
                          # put the item
                          pos = self.positions[needle]
                          if pos < 0:
                              self.extradata[-pos - 1] = (key, hashval, value[1])
                          else:
                              # just don't bother
                              self.extradata.append((key, hashval, value[1]))
                              self.positions[needle] = -len(self.extradata)
                      else:
                          # not found, put it in with extra positions
                          self.extradata.append((key, hashval, value[1]))
                          self.positions = (self.positions[:needle] + [-len(self.extradata)]
                                            + self.positions[needle:])
                          self.extrainfo = (self.extrainfo[:needle] + [0] +
                                            self.extrainfo[needle:])
                  def copy(self):
                      # XXX call _compact like in C?
                      return _lazymanifest(self.data, self.positions, self.extrainfo,
                          self.extradata)
                  def _compact(self):
                      # hopefully not called TOO often
                      if len(self.extradata) == 0:
                          return
                      l = []
                      last_cut = 0
                      i = 0
                      offset = 0
                      self.extrainfo = [0] * len(self.positions)
                      while i < len(self.positions):
                          if self.positions[i] >= 0:
                              cur = self.positions[i]
                              last_cut = cur
                              while True:
                                  self.positions[i] = offset
                                  i += 1
                                  if i == len(self.positions) or self.positions[i] < 0:
                                      break
                                  offset += self.positions[i] - cur
                                  cur = self.positions[i]
                              end_cut = self.data.find('\n', cur)
                              if end_cut != -1:
                                  end_cut += 1
                              offset += end_cut - cur
                              l.append(self.data[last_cut:end_cut])
                          else:
                              while i < len(self.positions) and self.positions[i] < 0:
                                  cur = self.positions[i]
                                  t = self.extradata[-cur - 1]
                                  l.append(self._pack(t))
                                  self.positions[i] = offset
                                  if len(t[1]) > 20:
                                      self.extrainfo[i] = ord(t[1][21])
                                  offset += len(l[-1])
                                  i += 1
                      self.data = ''.join(l)
                      self.extradata = []
                  def _pack(self, d):
                      return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n'
                  def text(self):
                      self._compact()
                      return self.data
                  def diff(self, m2, clean=False):
                      '''Finds changes between the current manifest and m2.'''
                      # XXX think whether efficiency matters here
                      diff = {}
                      for fn, e1, flags in self.iterentries():
                          if fn not in m2:
                              diff[fn] = (e1, flags), (None, '')
                          else:
                              e2 = m2[fn]
                              if (e1, flags) != e2:
                                  diff[fn] = (e1, flags), e2
                              elif clean:
                                  diff[fn] = None
                      for fn, e2, flags in m2.iterentries():
                          if fn not in self:
                              diff[fn] = (None, ''), (e2, flags)
                      return diff
                  def iterentries(self):
                      return lazymanifestiterentries(self)
                  def iterkeys(self):
                      return lazymanifestiter(self)
                  def __iter__(self):
                      return lazymanifestiter(self)
                  def __len__(self):
                      return len(self.positions)
                  def filtercopy(self, filterfn):
                      # XXX should be optimized
                      c = _lazymanifest('')
                      for f, n, fl in self.iterentries():
                          if filterfn(f):
                              c[f] = n, fl
                      return c
              try:
                  _lazymanifest = parsers.lazymanifest
              except AttributeError:
                  pass
              class manifestdict(object):
                  def __init__(self, data=''):
                      if data.startswith('\0'):
                          #_lazymanifest can not parse v2
                          self._lm = _lazymanifest('')
                          for f, n, fl in _parsev2(data):
                              self._lm[f] = n, fl
                      else:
                          self._lm = _lazymanifest(data)
                  def __getitem__(self, key):
                      return self._lm[key][0]
                  def find(self, key):
                      return self._lm[key]
                  def __len__(self):
                      return len(self._lm)
                  def __nonzero__(self):
                      # nonzero is covered by the __len__ function, but implementing it here
                      # makes it easier for extensions to override.
                      return len(self._lm) != 0
                  def __setitem__(self, key, node):
                      self._lm[key] = node, self.flags(key, '')
                  def __contains__(self, key):
                      return key in self._lm
                  def __delitem__(self, key):
                      del self._lm[key]
                  def __iter__(self):
                      return self._lm.__iter__()
                  def iterkeys(self):
                      return self._lm.iterkeys()
                  def keys(self):
                      return list(self.iterkeys())
                  def filesnotin(self, m2, match=None):
                      '''Set of files in this manifest that are not in the other'''
                      if match:
                          m1 = self.matches(match)
                          m2 = m2.matches(match)
                          return m1.filesnotin(m2)
                      diff = self.diff(m2)
                      files = set(filepath
                                  for filepath, hashflags in diff.iteritems()
                                  if hashflags[1][0] is None)
                      return files
                  @propertycache
                  def _dirs(self):
                      return util.dirs(self)
                  def dirs(self):
                      return self._dirs
                  def hasdir(self, dir):
                      return dir in self._dirs
                  def _filesfastpath(self, match):
                      '''Checks whether we can correctly and quickly iterate over matcher
                      files instead of over manifest files.'''
                      files = match.files()
                      return (len(files) < 100 and (match.isexact() or
                          (match.prefix() and all(fn in self for fn in files))))
                  def walk(self, match):
                      '''Generates matching file names.
                      Equivalent to manifest.matches(match).iterkeys(), but without creating
                      an entirely new manifest.
                      It also reports nonexistent files by marking them bad with match.bad().
                      '''
                      if match.always():
                          for f in iter(self):
                              yield f
                          return
                      fset = set(match.files())
                      # avoid the entire walk if we're only looking for specific files
                      if self._filesfastpath(match):
                          for fn in sorted(fset):
                              yield fn
                          return
                      for fn in self:
                          if fn in fset:
                              # specified pattern is the exact name
                              fset.remove(fn)
                          if match(fn):
                              yield fn
                      # for dirstate.walk, files=['.'] means "walk the whole tree".
                      # follow that here, too
                      fset.discard('.')
                      for fn in sorted(fset):
                          if not self.hasdir(fn):
                              match.bad(fn, None)
                  def matches(self, match):
                      '''generate a new manifest filtered by the match argument'''
                      if match.always():
                          return self.copy()
                      if self._filesfastpath(match):
                          m = manifestdict()
                          lm = self._lm
                          for fn in match.files():
                              if fn in lm:
                                  m._lm[fn] = lm[fn]
                          return m
                      m = manifestdict()
                      m._lm = self._lm.filtercopy(match)
                      return m
                  def diff(self, m2, match=None, clean=False):
                      '''Finds changes between the current manifest and m2.
                      Args:
                        m2: the manifest to which this manifest should be compared.
                        clean: if true, include files unchanged between these manifests
                               with a None value in the returned dictionary.
                      The result is returned as a dict with filename as key and
                      values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
                      nodeid in the current/other manifest and fl1/fl2 is the flag
                      in the current/other manifest. Where the file does not exist,
                      the nodeid will be None and the flags will be the empty
                      string.
                      '''
                      if match:
                          m1 = self.matches(match)
                          m2 = m2.matches(match)
                          return m1.diff(m2, clean=clean)
                      return self._lm.diff(m2._lm, clean)
                  def setflag(self, key, flag):
                      self._lm[key] = self[key], flag
                  def get(self, key, default=None):
                      try:
                          return self._lm[key][0]
                      except KeyError:
                          return default
                  def flags(self, key, default=''):
                      try:
                          return self._lm[key][1]
                      except KeyError:
                          return default
                  def copy(self):
                      c = manifestdict()
                      c._lm = self._lm.copy()
                      return c
                  def iteritems(self):
                      return (x[:2] for x in self._lm.iterentries())
                  def iterentries(self):
                      return self._lm.iterentries()
                  def text(self, usemanifestv2=False):
                      if usemanifestv2:
                          return _textv2(self._lm.iterentries())
                      else:
                          # use (probably) native version for v1
                          return self._lm.text()
                  def fastdelta(self, base, changes):
                      """Given a base manifest text as an array.array and a list of changes
                      relative to that text, compute a delta that can be used by revlog.
                      """
                      delta = []
                      dstart = None
                      dend = None
                      dline = [""]
                      start = 0
                      # zero copy representation of base as a buffer
                      addbuf = util.buffer(base)
                      changes = list(changes)
                      if len(changes) < 1000:
                          # start with a readonly loop that finds the offset of
                          # each line and creates the deltas
                          for f, todelete in changes:
                              # bs will either be the index of the item or the insert point
                              start, end = _msearch(addbuf, f, start)
                              if not todelete:
                                  h, fl = self._lm[f]
                                  l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
                              else:
                                  if start == end:
                                      # item we want to delete was not found, error out
                                      raise AssertionError(
                                              _("failed to remove %s from manifest") % f)
                                  l = ""
                              if dstart is not None and dstart <= start and dend >= start:
                                  if dend < end:
                                      dend = end
                                  if l:
                                      dline.append(l)
                              else:
                                  if dstart is not None:
                                      delta.append([dstart, dend, "".join(dline)])
                                  dstart = start
                                  dend = end
                                  dline = [l]
                          if dstart is not None:
                              delta.append([dstart, dend, "".join(dline)])
                          # apply the delta to the base, and get a delta for addrevision
                          deltatext, arraytext = _addlistdelta(base, delta)
                      else:
                          # For large changes, it's much cheaper to just build the text and
                          # diff it.
-                         arraytext = array.array('c', self.text())
-                         deltatext = mdiff.textdiff(base, arraytext)
+                         arraytext = bytearray(self.text())
+                         deltatext = mdiff.textdiff(
+                             util.buffer(base), util.buffer(arraytext))
                      return arraytext, deltatext
              def _msearch(m, s, lo=0, hi=None):
                  '''return a tuple (start, end) that says where to find s within m.
                  If the string is found m[start:end] are the line containing
                  that string.  If start == end the string was not found and
                  they indicate the proper sorted insertion point.
                  m should be a buffer or a string
                  s is a string'''
                  def advance(i, c):
                      while i < lenm and m[i] != c:
                          i += 1
                      return i
                  if not s:
                      return (lo, lo)
                  lenm = len(m)
                  if not hi:
                      hi = lenm
                  while lo < hi:
                      mid = (lo + hi) // 2
                      start = mid
                      while start > 0 and m[start - 1] != '\n':
                          start -= 1
                      end = advance(start, '\0')
                      if m[start:end] < s:
                          # we know that after the null there are 40 bytes of sha1
                          # this translates to the bisect lo = mid + 1
                          lo = advance(end + 40, '\n') + 1
                      else:
                          # this translates to the bisect hi = mid
                          hi = start
                  end = advance(lo, '\0')
                  found = m[lo:end]
                  if s == found:
                      # we know that after the null there are 40 bytes of sha1
                      end = advance(end + 40, '\n')
                      return (lo, end + 1)
                  else:
                      return (lo, lo)
              def _checkforbidden(l):
                  """Check filenames for illegal characters."""
                  for f in l:
                      if '\n' in f or '\r' in f:
                          raise error.RevlogError(
                              _("'\\n' and '\\r' disallowed in filenames: %r") % f)
              # apply the changes collected during the bisect loop to our addlist
              # return a delta suitable for addrevision
              def _addlistdelta(addlist, x):
                  # for large addlist arrays, building a new array is cheaper
                  # than repeatedly modifying the existing one
                  currentposition = 0
-                 newaddlist = array.array('c')
+                 newaddlist = bytearray()
                  for start, end, content in x:
                      newaddlist += addlist[currentposition:start]
                      if content:
-                         newaddlist += array.array('c', content)
+                         newaddlist += bytearray(content)
                      currentposition = end
                  newaddlist += addlist[currentposition:]
                  deltatext = "".join(struct.pack(">lll", start, end, len(content))
                                 + content for start, end, content in x)
                  return deltatext, newaddlist
              def _splittopdir(f):
                  if '/' in f:
                      dir, subpath = f.split('/', 1)
                      return dir + '/', subpath
                  else:
                      return '', f
              _noop = lambda s: None
              class treemanifest(object):
                  def __init__(self, dir='', text=''):
                      self._dir = dir
                      self._node = revlog.nullid
                      self._loadfunc = _noop
                      self._copyfunc = _noop
                      self._dirty = False
                      self._dirs = {}
                      # Using _lazymanifest here is a little slower than plain old dicts
                      self._files = {}
                      self._flags = {}
                      if text:
                          def readsubtree(subdir, subm):
                              raise AssertionError('treemanifest constructor only accepts '
                                                   'flat manifests')
                          self.parse(text, readsubtree)
                          self._dirty = True # Mark flat manifest dirty after parsing
                  def _subpath(self, path):
                      return self._dir + path
                  def __len__(self):
                      self._load()
                      size = len(self._files)
                      for m in self._dirs.values():
                          size += m.__len__()
                      return size
                  def _isempty(self):
                      self._load() # for consistency; already loaded by all callers
                      return (not self._files and (not self._dirs or
                              all(m._isempty() for m in self._dirs.values())))
                  def __repr__(self):
                      return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
                              (self._dir, revlog.hex(self._node),
                               bool(self._loadfunc is _noop),
                               self._dirty, id(self)))
                  def dir(self):
                      '''The directory that this tree manifest represents, including a
                      trailing '/'. Empty string for the repo root directory.'''
                      return self._dir
                  def node(self):
                      '''This node of this instance. nullid for unsaved instances. Should
                      be updated when the instance is read or written from a revlog.
                      '''
                      assert not self._dirty
                      return self._node
                  def setnode(self, node):
                      self._node = node
                      self._dirty = False
                  def iterentries(self):
                      self._load()
                      for p, n in sorted(self._dirs.items() + self._files.items()):
                          if p in self._files:
                              yield self._subpath(p), n, self._flags.get(p, '')
                          else:
                              for x in n.iterentries():
                                  yield x
                  def iteritems(self):
                      self._load()
                      for p, n in sorted(self._dirs.items() + self._files.items()):
                          if p in self._files:
                              yield self._subpath(p), n
                          else:
                              for f, sn in n.iteritems():
                                  yield f, sn
                  def iterkeys(self):
                      self._load()
                      for p in sorted(self._dirs.keys() + self._files.keys()):
                          if p in self._files:
                              yield self._subpath(p)
                          else:
                              for f in self._dirs[p].iterkeys():
                                  yield f
                  def keys(self):
                      return list(self.iterkeys())
                  def __iter__(self):
                      return self.iterkeys()
                  def __contains__(self, f):
                      if f is None:
                          return False
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          if dir not in self._dirs:
                              return False
                          return self._dirs[dir].__contains__(subpath)
                      else:
                          return f in self._files
                  def get(self, f, default=None):
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          if dir not in self._dirs:
                              return default
                          return self._dirs[dir].get(subpath, default)
                      else:
                          return self._files.get(f, default)
                  def __getitem__(self, f):
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          return self._dirs[dir].__getitem__(subpath)
                      else:
                          return self._files[f]
                  def flags(self, f):
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          if dir not in self._dirs:
                              return ''
                          return self._dirs[dir].flags(subpath)
                      else:
                          if f in self._dirs:
                              return ''
                          return self._flags.get(f, '')
                  def find(self, f):
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          return self._dirs[dir].find(subpath)
                      else:
                          return self._files[f], self._flags.get(f, '')
                  def __delitem__(self, f):
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          self._dirs[dir].__delitem__(subpath)
                          # If the directory is now empty, remove it
                          if self._dirs[dir]._isempty():
                              del self._dirs[dir]
                      else:
                          del self._files[f]
                          if f in self._flags:
                              del self._flags[f]
                      self._dirty = True
                  def __setitem__(self, f, n):
                      assert n is not None
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          if dir not in self._dirs:
                              self._dirs[dir] = treemanifest(self._subpath(dir))
                          self._dirs[dir].__setitem__(subpath, n)
                      else:
                          self._files[f] = n[:21] # to match manifestdict's behavior
                      self._dirty = True
                  def _load(self):
                      if self._loadfunc is not _noop:
                          lf, self._loadfunc = self._loadfunc, _noop
                          lf(self)
                      elif self._copyfunc is not _noop:
                          cf, self._copyfunc = self._copyfunc, _noop
                          cf(self)
                  def setflag(self, f, flags):
                      """Set the flags (symlink, executable) for path f."""
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:
                          if dir not in self._dirs:
                              self._dirs[dir] = treemanifest(self._subpath(dir))
                          self._dirs[dir].setflag(subpath, flags)
                      else:
                          self._flags[f] = flags
                      self._dirty = True
                  def copy(self):
                      copy = treemanifest(self._dir)
                      copy._node = self._node
                      copy._dirty = self._dirty
                      if self._copyfunc is _noop:
                          def _copyfunc(s):
                              self._load()
                              for d in self._dirs:
                                  s._dirs[d] = self._dirs[d].copy()
                              s._files = dict.copy(self._files)
                              s._flags = dict.copy(self._flags)
                          if self._loadfunc is _noop:
                              _copyfunc(copy)
                          else:
                              copy._copyfunc = _copyfunc
                      else:
                          copy._copyfunc = self._copyfunc
                      return copy
                  def filesnotin(self, m2, match=None):
                      '''Set of files in this manifest that are not in the other'''
                      if match:
                          m1 = self.matches(match)
                          m2 = m2.matches(match)
                          return m1.filesnotin(m2)
                      files = set()
                      def _filesnotin(t1, t2):
                          if t1._node == t2._node and not t1._dirty and not t2._dirty:
                              return
                          t1._load()
                          t2._load()
                          for d, m1 in t1._dirs.iteritems():
                              if d in t2._dirs:
                                  m2 = t2._dirs[d]
                                  _filesnotin(m1, m2)
                              else:
                                  files.update(m1.iterkeys())
                          for fn in t1._files.iterkeys():
                              if fn not in t2._files:
                                  files.add(t1._subpath(fn))
                      _filesnotin(self, m2)
                      return files
                  @propertycache
                  def _alldirs(self):
                      return util.dirs(self)
                  def dirs(self):
                      return self._alldirs
                  def hasdir(self, dir):
                      self._load()
                      topdir, subdir = _splittopdir(dir)
                      if topdir:
                          if topdir in self._dirs:
                              return self._dirs[topdir].hasdir(subdir)
                          return False
                      return (dir + '/') in self._dirs
                  def walk(self, match):
                      '''Generates matching file names.
                      Equivalent to manifest.matches(match).iterkeys(), but without creating
                      an entirely new manifest.
                      It also reports nonexistent files by marking them bad with match.bad().
                      '''
                      if match.always():
                          for f in iter(self):
                              yield f
                          return
                      fset = set(match.files())
                      for fn in self._walk(match):
                          if fn in fset:
                              # specified pattern is the exact name
                              fset.remove(fn)
                          yield fn
                      # for dirstate.walk, files=['.'] means "walk the whole tree".
                      # follow that here, too
                      fset.discard('.')
                      for fn in sorted(fset):
                          if not self.hasdir(fn):
                              match.bad(fn, None)
                  def _walk(self, match):
                      '''Recursively generates matching file names for walk().'''
                      if not match.visitdir(self._dir[:-1] or '.'):
                          return
                      # yield this dir's files and walk its submanifests
                      self._load()
                      for p in sorted(self._dirs.keys() + self._files.keys()):
                          if p in self._files:
                              fullp = self._subpath(p)
                              if match(fullp):
                                  yield fullp
                          else:
                              for f in self._dirs[p]._walk(match):
                                  yield f
                  def matches(self, match):
                      '''generate a new manifest filtered by the match argument'''
                      if match.always():
                          return self.copy()
                      return self._matches(match)
                  def _matches(self, match):
                      '''recursively generate a new manifest filtered by the match argument.
                      '''
                      visit = match.visitdir(self._dir[:-1] or '.')
                      if visit == 'all':
                          return self.copy()
                      ret = treemanifest(self._dir)
                      if not visit:
                          return ret
                      self._load()
                      for fn in self._files:
                          fullp = self._subpath(fn)
                          if not match(fullp):
                              continue
                          ret._files[fn] = self._files[fn]
                          if fn in self._flags:
                              ret._flags[fn] = self._flags[fn]
                      for dir, subm in self._dirs.iteritems():
                          m = subm._matches(match)
                          if not m._isempty():
                              ret._dirs[dir] = m
                      if not ret._isempty():
                          ret._dirty = True
                      return ret
                  def diff(self, m2, match=None, clean=False):
                      '''Finds changes between the current manifest and m2.
                      Args:
                        m2: the manifest to which this manifest should be compared.
                        clean: if true, include files unchanged between these manifests
                               with a None value in the returned dictionary.
                      The result is returned as a dict with filename as key and
                      values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
                      nodeid in the current/other manifest and fl1/fl2 is the flag
                      in the current/other manifest. Where the file does not exist,
                      the nodeid will be None and the flags will be the empty
                      string.
                      '''
                      if match:
                          m1 = self.matches(match)
                          m2 = m2.matches(match)
                          return m1.diff(m2, clean=clean)
                      result = {}
                      emptytree = treemanifest()
                      def _diff(t1, t2):
                          if t1._node == t2._node and not t1._dirty and not t2._dirty:
                              return
                          t1._load()
                          t2._load()
                          for d, m1 in t1._dirs.iteritems():
                              m2 = t2._dirs.get(d, emptytree)
                              _diff(m1, m2)
                          for d, m2 in t2._dirs.iteritems():
                              if d not in t1._dirs:
                                  _diff(emptytree, m2)
                          for fn, n1 in t1._files.iteritems():
                              fl1 = t1._flags.get(fn, '')
                              n2 = t2._files.get(fn, None)
                              fl2 = t2._flags.get(fn, '')
                              if n1 != n2 or fl1 != fl2:
                                  result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
                              elif clean:
                                  result[t1._subpath(fn)] = None
                          for fn, n2 in t2._files.iteritems():
                              if fn not in t1._files:
                                  fl2 = t2._flags.get(fn, '')
                                  result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
                      _diff(self, m2)
                      return result
                  def unmodifiedsince(self, m2):
                      return not self._dirty and not m2._dirty and self._node == m2._node
                  def parse(self, text, readsubtree):
                      for f, n, fl in _parse(text):
                          if fl == 't':
                              f = f + '/'
                              self._dirs[f] = readsubtree(self._subpath(f), n)
                          elif '/' in f:
                              # This is a flat manifest, so use __setitem__ and setflag rather
                              # than assigning directly to _files and _flags, so we can
                              # assign a path in a subdirectory, and to mark dirty (compared
                              # to nullid).
                              self[f] = n
                              if fl:
                                  self.setflag(f, fl)
                          else:
                              # Assigning to _files and _flags avoids marking as dirty,
                              # and should be a little faster.
                              self._files[f] = n
                              if fl:
                                  self._flags[f] = fl
                  def text(self, usemanifestv2=False):
                      """Get the full data of this manifest as a bytestring."""
                      self._load()
                      return _text(self.iterentries(), usemanifestv2)
                  def dirtext(self, usemanifestv2=False):
                      """Get the full data of this directory as a bytestring. Make sure that
                      any submanifests have been written first, so their nodeids are correct.
                      """
                      self._load()
                      flags = self.flags
                      dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
                      files = [(f, self._files[f], flags(f)) for f in self._files]
                      return _text(sorted(dirs + files), usemanifestv2)
                  def read(self, gettext, readsubtree):
                      def _load_for_read(s):
                          s.parse(gettext(), readsubtree)
                          s._dirty = False
                      self._loadfunc = _load_for_read
                  def writesubtrees(self, m1, m2, writesubtree):
                      self._load() # for consistency; should never have any effect here
                      m1._load()
                      m2._load()
                      emptytree = treemanifest()
                      for d, subm in self._dirs.iteritems():
                          subp1 = m1._dirs.get(d, emptytree)._node
                          subp2 = m2._dirs.get(d, emptytree)._node
                          if subp1 == revlog.nullid:
                              subp1, subp2 = subp2, subp1
                          writesubtree(subm, subp1, subp2)
              class manifestrevlog(revlog.revlog):
                  '''A revlog that stores manifest texts. This is responsible for caching the
                  full-text manifest contents.
                  '''
                  def __init__(self, opener, dir='', dirlogcache=None, indexfile=None):
                      """Constructs a new manifest revlog
                      `indexfile` - used by extensions to have two manifests at once, like
                      when transitioning between flatmanifeset and treemanifests.
                      """
                      # During normal operations, we expect to deal with not more than four
                      # revs at a time (such as during commit --amend). When rebasing large
                      # stacks of commits, the number can go up, hence the config knob below.
                      cachesize = 4
                      usetreemanifest = False
                      usemanifestv2 = False
                      opts = getattr(opener, 'options', None)
                      if opts is not None:
                          cachesize = opts.get('manifestcachesize', cachesize)
                          usetreemanifest = opts.get('treemanifest', usetreemanifest)
                          usemanifestv2 = opts.get('manifestv2', usemanifestv2)
                      self._treeondisk = usetreemanifest
                      self._usemanifestv2 = usemanifestv2
                      self._fulltextcache = util.lrucachedict(cachesize)
                      if dir:
                          assert self._treeondisk, 'opts is %r' % opts
                          if not dir.endswith('/'):
                              dir = dir + '/'
                      if indexfile is None:
                          indexfile = '00manifest.i'
                          if dir:
                              indexfile = "meta/" + dir + indexfile
                      self._dir = dir
                      # The dirlogcache is kept on the root manifest log
                      if dir:
                          self._dirlogcache = dirlogcache
                      else:
                          self._dirlogcache = {'': self}
                      super(manifestrevlog, self).__init__(opener, indexfile,
                                                           checkambig=bool(dir))
                  @property
                  def fulltextcache(self):
                      return self._fulltextcache
                  def clearcaches(self):
                      super(manifestrevlog, self).clearcaches()
                      self._fulltextcache.clear()
                      self._dirlogcache = {'': self}
                  def dirlog(self, dir):
                      if dir:
                          assert self._treeondisk
                      if dir not in self._dirlogcache:
                          self._dirlogcache[dir] = manifestrevlog(self.opener, dir,
                                                                  self._dirlogcache)
                      return self._dirlogcache[dir]
                  def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
                      if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
                          and not self._usemanifestv2):
                          # If our first parent is in the manifest cache, we can
                          # compute a delta here using properties we know about the
                          # manifest up-front, which may save time later for the
                          # revlog layer.
                          _checkforbidden(added)
                          # combine the changed lists into one sorted iterator
                          work = heapq.merge([(x, False) for x in added],
                                             [(x, True) for x in removed])
                          arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
                          cachedelta = self.rev(p1), deltatext
                          text = util.buffer(arraytext)
                          n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
                      else:
                          # The first parent manifest isn't already loaded, so we'll
                          # just encode a fulltext of the manifest and pass that
                          # through to the revlog layer, and let it handle the delta
                          # process.
                          if self._treeondisk:
                              assert readtree, "readtree must be set for treemanifest writes"
                              m1 = readtree(self._dir, p1)
                              m2 = readtree(self._dir, p2)
                              n = self._addtree(m, transaction, link, m1, m2, readtree)
                              arraytext = None
                          else:
                              text = m.text(self._usemanifestv2)
                              n = self.addrevision(text, transaction, link, p1, p2)
-                             arraytext = array.array('c', text)
+                             arraytext = bytearray(text)
                      if arraytext is not None:
                          self.fulltextcache[n] = arraytext
                      return n
                  def _addtree(self, m, transaction, link, m1, m2, readtree):
                      # If the manifest is unchanged compared to one parent,
                      # don't write a new revision
                      if self._dir != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(m2)):
                          return m.node()
                      def writesubtree(subm, subp1, subp2):
                          sublog = self.dirlog(subm.dir())
                          sublog.add(subm, transaction, link, subp1, subp2, None, None,
                                     readtree=readtree)
                      m.writesubtrees(m1, m2, writesubtree)
                      text = m.dirtext(self._usemanifestv2)
                      n = None
                      if self._dir != '':
                          # Double-check whether contents are unchanged to one parent
                          if text == m1.dirtext(self._usemanifestv2):
                              n = m1.node()
                          elif text == m2.dirtext(self._usemanifestv2):
                              n = m2.node()
                      if not n:
                          n = self.addrevision(text, transaction, link, m1.node(), m2.node())
                      # Save nodeid so parent manifest can calculate its nodeid
                      m.setnode(n)
                      return n
              class manifestlog(object):
                  """A collection class representing the collection of manifest snapshots
                  referenced by commits in the repository.
                  In this situation, 'manifest' refers to the abstract concept of a snapshot
                  of the list of files in the given commit. Consumers of the output of this
                  class do not care about the implementation details of the actual manifests
                  they receive (i.e. tree or flat or lazily loaded, etc)."""
                  def __init__(self, opener, repo):
                      usetreemanifest = False
                      cachesize = 4
                      opts = getattr(opener, 'options', None)
                      if opts is not None:
                          usetreemanifest = opts.get('treemanifest', usetreemanifest)
                          cachesize = opts.get('manifestcachesize', cachesize)
                      self._treeinmem = usetreemanifest
                      self._oldmanifest = repo._constructmanifest()
                      self._revlog = self._oldmanifest
                      # A cache of the manifestctx or treemanifestctx for each directory
                      self._dirmancache = {}
                      self._dirmancache[''] = util.lrucachedict(cachesize)
                      self.cachesize = cachesize
                  def __getitem__(self, node):
                      """Retrieves the manifest instance for the given node. Throws a
                      LookupError if not found.
                      """
                      return self.get('', node)
                  def get(self, dir, node, verify=True):
                      """Retrieves the manifest instance for the given node. Throws a
                      LookupError if not found.
                      `verify` - if True an exception will be thrown if the node is not in
                                 the revlog
                      """
                      if node in self._dirmancache.get(dir, ()):
                          cachemf = self._dirmancache[dir][node]
                          # The old manifest may put non-ctx manifests in the cache, so
                          # skip those since they don't implement the full api.
                          if (isinstance(cachemf, manifestctx) or
                              isinstance(cachemf, treemanifestctx)):
                              return cachemf
                      if dir:
                          if self._revlog._treeondisk:
                              if verify:
                                  dirlog = self._revlog.dirlog(dir)
                                  if node not in dirlog.nodemap:
                                      raise LookupError(node, dirlog.indexfile,
                                                        _('no node'))
                              m = treemanifestctx(self, dir, node)
                          else:
                              raise error.Abort(
                                      _("cannot ask for manifest directory '%s' in a flat "
                                        "manifest") % dir)
                      else:
                          if verify:
                              if node not in self._revlog.nodemap:
                                  raise LookupError(node, self._revlog.indexfile,
                                                    _('no node'))
                          if self._treeinmem:
                              m = treemanifestctx(self, '', node)
                          else:
                              m = manifestctx(self, node)
                      if node != revlog.nullid:
                          mancache = self._dirmancache.get(dir)
                          if not mancache:
                              mancache = util.lrucachedict(self.cachesize)
                              self._dirmancache[dir] = mancache
                          mancache[node] = m
                      return m
                  def clearcaches(self):
                      self._dirmancache.clear()
                      self._revlog.clearcaches()
              class memmanifestctx(object):
                  def __init__(self, manifestlog):
                      self._manifestlog = manifestlog
                      self._manifestdict = manifestdict()
                  def _revlog(self):
                      return self._manifestlog._revlog
                  def new(self):
                      return memmanifestctx(self._manifestlog)
                  def copy(self):
                      memmf = memmanifestctx(self._manifestlog)
                      memmf._manifestdict = self.read().copy()
                      return memmf
                  def read(self):
                      return self._manifestdict
                  def write(self, transaction, link, p1, p2, added, removed):
                      return self._revlog().add(self._manifestdict, transaction, link, p1, p2,
                                                added, removed)
              class manifestctx(object):
                  """A class representing a single revision of a manifest, including its
                  contents, its parent revs, and its linkrev.
                  """
                  def __init__(self, manifestlog, node):
                      self._manifestlog = manifestlog
                      self._data = None
                      self._node = node
                      # TODO: We eventually want p1, p2, and linkrev exposed on this class,
                      # but let's add it later when something needs it and we can load it
                      # lazily.
                      #self.p1, self.p2 = revlog.parents(node)
                      #rev = revlog.rev(node)
                      #self.linkrev = revlog.linkrev(rev)
                  def _revlog(self):
                      return self._manifestlog._revlog
                  def node(self):
                      return self._node
                  def new(self):
                      return memmanifestctx(self._manifestlog)
                  def copy(self):
                      memmf = memmanifestctx(self._manifestlog)
                      memmf._manifestdict = self.read().copy()
                      return memmf
                  @propertycache
                  def parents(self):
                      return self._revlog().parents(self._node)
                  def read(self):
                      if self._data is None:
                          if self._node == revlog.nullid:
                              self._data = manifestdict()
                          else:
                              rl = self._revlog()
                              text = rl.revision(self._node)
-                             arraytext = array.array('c', text)
+                             arraytext = bytearray(text)
                              rl._fulltextcache[self._node] = arraytext
                              self._data = manifestdict(text)
                      return self._data
                  def readfast(self, shallow=False):
                      '''Calls either readdelta or read, based on which would be less work.
                      readdelta is called if the delta is against the p1, and therefore can be
                      read quickly.
                      If `shallow` is True, nothing changes since this is a flat manifest.
                      '''
                      rl = self._revlog()
                      r = rl.rev(self._node)
                      deltaparent = rl.deltaparent(r)
                      if deltaparent != revlog.nullrev and deltaparent in rl.parentrevs(r):
                          return self.readdelta()
                      return self.read()
                  def readdelta(self, shallow=False):
                      '''Returns a manifest containing just the entries that are present
                      in this manifest, but not in its p1 manifest. This is efficient to read
                      if the revlog delta is already p1.
                      Changing the value of `shallow` has no effect on flat manifests.
                      '''
                      revlog = self._revlog()
                      if revlog._usemanifestv2:
                          # Need to perform a slow delta
                          r0 = revlog.deltaparent(revlog.rev(self._node))
                          m0 = self._manifestlog[revlog.node(r0)].read()
                          m1 = self.read()
                          md = manifestdict()
                          for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
                              if n1:
                                  md[f] = n1
                                  if fl1:
                                      md.setflag(f, fl1)
                          return md
                      r = revlog.rev(self._node)
                      d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
                      return manifestdict(d)
                  def find(self, key):
                      return self.read().find(key)
              class memtreemanifestctx(object):
                  def __init__(self, manifestlog, dir=''):
                      self._manifestlog = manifestlog
                      self._dir = dir
                      self._treemanifest = treemanifest()
                  def _revlog(self):
                      return self._manifestlog._revlog
                  def new(self, dir=''):
                      return memtreemanifestctx(self._manifestlog, dir=dir)
                  def copy(self):
                      memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
                      memmf._treemanifest = self._treemanifest.copy()
                      return memmf
                  def read(self):
                      return self._treemanifest
                  def write(self, transaction, link, p1, p2, added, removed):
                      def readtree(dir, node):
                          return self._manifestlog.get(dir, node).read()
                      return self._revlog().add(self._treemanifest, transaction, link, p1, p2,
                                                added, removed, readtree=readtree)
              class treemanifestctx(object):
                  def __init__(self, manifestlog, dir, node):
                      self._manifestlog = manifestlog
                      self._dir = dir
                      self._data = None
                      self._node = node
                      # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
                      # we can instantiate treemanifestctx objects for directories we don't
                      # have on disk.
                      #self.p1, self.p2 = revlog.parents(node)
                      #rev = revlog.rev(node)
                      #self.linkrev = revlog.linkrev(rev)
                  def _revlog(self):
                      return self._manifestlog._revlog.dirlog(self._dir)
                  def read(self):
                      if self._data is None:
                          rl = self._revlog()
                          if self._node == revlog.nullid:
                              self._data = treemanifest()
                          elif rl._treeondisk:
                              m = treemanifest(dir=self._dir)
                              def gettext():
                                  return rl.revision(self._node)
                              def readsubtree(dir, subm):
                                  # Set verify to False since we need to be able to create
                                  # subtrees for trees that don't exist on disk.
                                  return self._manifestlog.get(dir, subm, verify=False).read()
                              m.read(gettext, readsubtree)
                              m.setnode(self._node)
                              self._data = m
                          else:
                              text = rl.revision(self._node)
-                             arraytext = array.array('c', text)
+                             arraytext = bytearray(text)
                              rl.fulltextcache[self._node] = arraytext
                              self._data = treemanifest(dir=self._dir, text=text)
                      return self._data
                  def node(self):
                      return self._node
                  def new(self, dir=''):
                      return memtreemanifestctx(self._manifestlog, dir=dir)
                  def copy(self):
                      memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
                      memmf._treemanifest = self.read().copy()
                      return memmf
                  @propertycache
                  def parents(self):
                      return self._revlog().parents(self._node)
                  def readdelta(self, shallow=False):
                      '''Returns a manifest containing just the entries that are present
                      in this manifest, but not in its p1 manifest. This is efficient to read
                      if the revlog delta is already p1.
                      If `shallow` is True, this will read the delta for this directory,
                      without recursively reading subdirectory manifests. Instead, any
                      subdirectory entry will be reported as it appears in the manifest, i.e.
                      the subdirectory will be reported among files and distinguished only by
                      its 't' flag.
                      '''
                      revlog = self._revlog()
                      if shallow and not revlog._usemanifestv2:
                          r = revlog.rev(self._node)
                          d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
                          return manifestdict(d)
                      else:
                          # Need to perform a slow delta
                          r0 = revlog.deltaparent(revlog.rev(self._node))
                          m0 = self._manifestlog.get(self._dir, revlog.node(r0)).read()
                          m1 = self.read()
                          md = treemanifest(dir=self._dir)
                          for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
                              if n1:
                                  md[f] = n1
                                  if fl1:
                                      md.setflag(f, fl1)
                          return md
                  def readfast(self, shallow=False):
                      '''Calls either readdelta or read, based on which would be less work.
                      readdelta is called if the delta is against the p1, and therefore can be
                      read quickly.
                      If `shallow` is True, it only returns the entries from this manifest,
                      and not any submanifests.
                      '''
                      rl = self._revlog()
                      r = rl.rev(self._node)
                      deltaparent = rl.deltaparent(r)
                      if (deltaparent != revlog.nullrev and
                          deltaparent in rl.parentrevs(r)):
                          return self.readdelta(shallow=shallow)
                      if shallow:
                          return manifestdict(rl.revision(self._node))
                      else:
                          return self.read()
                  def find(self, key):
                      return self.read().find(key)

mercurial/tags.py

0 +3 -4

              # tags.py - read tag info from local repository
              #
              # Copyright 2009 Matt Mackall <mpm@selenic.com>
              # Copyright 2009 Greg Ward <greg@gerg.ca>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              # Currently this module only deals with reading and caching tags.
              # Eventually, it could take care of updating (adding/removing/moving)
              # tags too.
              from __future__ import absolute_import
              import array
              import errno
              from .node import (
                  bin,
                  hex,
                  nullid,
                  short,
              )
              from . import (
                  encoding,
                  error,
                  scmutil,
                  util,
              )
              array = array.array
              # Tags computation can be expensive and caches exist to make it fast in
              # the common case.
              #
              # The "hgtagsfnodes1" cache file caches the .hgtags filenode values for
              # each revision in the repository. The file is effectively an array of
              # fixed length records. Read the docs for "hgtagsfnodescache" for technical
              # details.
              #
              # The .hgtags filenode cache grows in proportion to the length of the
              # changelog. The file is truncated when the # changelog is stripped.
              #
              # The purpose of the filenode cache is to avoid the most expensive part
              # of finding global tags, which is looking up the .hgtags filenode in the
              # manifest for each head. This can take dozens or over 100ms for
              # repositories with very large manifests. Multiplied by dozens or even
              # hundreds of heads and there is a significant performance concern.
              #
              # There also exist a separate cache file for each repository filter.
              # These "tags-*" files store information about the history of tags.
              #
              # The tags cache files consists of a cache validation line followed by
              # a history of tags.
              #
              # The cache validation line has the format:
              #
              #   <tiprev> <tipnode> [<filteredhash>]
              #
              # <tiprev> is an integer revision and <tipnode> is a 40 character hex
              # node for that changeset. These redundantly identify the repository
              # tip from the time the cache was written. In addition, <filteredhash>,
              # if present, is a 40 character hex hash of the contents of the filtered
              # revisions for this filter. If the set of filtered revs changes, the
              # hash will change and invalidate the cache.
              #
              # The history part of the tags cache consists of lines of the form:
              #
              #   <node> <tag>
              #
              # (This format is identical to that of .hgtags files.)
              #
              # <tag> is the tag name and <node> is the 40 character hex changeset
              # the tag is associated with.
              #
              # Tags are written sorted by tag name.
              #
              # Tags associated with multiple changesets have an entry for each changeset.
              # The most recent changeset (in terms of revlog ordering for the head
              # setting it) for each tag is last.
              def findglobaltags(ui, repo, alltags, tagtypes):
                  '''Find global tags in a repo.
                  "alltags" maps tag name to (node, hist) 2-tuples.
                  "tagtypes" maps tag name to tag type. Global tags always have the
                  "global" tag type.
                  The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
                  should be passed in.
                  The tags cache is read and updated as a side-effect of calling.
                  '''
                  # This is so we can be lazy and assume alltags contains only global
                  # tags when we pass it to _writetagcache().
                  assert len(alltags) == len(tagtypes) == 0, \
                         "findglobaltags() should be called first"
                  (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
                  if cachetags is not None:
                      assert not shouldwrite
                      # XXX is this really 100% correct?  are there oddball special
                      # cases where a global tag should outrank a local tag but won't,
                      # because cachetags does not contain rank info?
                      _updatetags(cachetags, 'global', alltags, tagtypes)
                      return
                  seen = set()  # set of fnode
                  fctx = None
                  for head in reversed(heads):  # oldest to newest
                      assert head in repo.changelog.nodemap, \
                             "tag cache returned bogus head %s" % short(head)
                      fnode = tagfnode.get(head)
                      if fnode and fnode not in seen:
                          seen.add(fnode)
                          if not fctx:
                              fctx = repo.filectx('.hgtags', fileid=fnode)
                          else:
                              fctx = fctx.filectx(fnode)
                          filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
                          _updatetags(filetags, 'global', alltags, tagtypes)
                  # and update the cache (if necessary)
                  if shouldwrite:
                      _writetagcache(ui, repo, valid, alltags)
              def readlocaltags(ui, repo, alltags, tagtypes):
                  '''Read local tags in repo. Update alltags and tagtypes.'''
                  try:
                      data = repo.vfs.read("localtags")
                  except IOError as inst:
                      if inst.errno != errno.ENOENT:
                          raise
                      return
                  # localtags is in the local encoding; re-encode to UTF-8 on
                  # input for consistency with the rest of this module.
                  filetags = _readtags(
                      ui, repo, data.splitlines(), "localtags",
                      recode=encoding.fromlocal)
                  # remove tags pointing to invalid nodes
                  cl = repo.changelog
                  for t in filetags.keys():
                      try:
                          cl.rev(filetags[t][0])
                      except (LookupError, ValueError):
                          del filetags[t]
                  _updatetags(filetags, "local", alltags, tagtypes)
              def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
                  '''Read tag definitions from a file (or any source of lines).
                  This function returns two sortdicts with similar information:
                  - the first dict, bintaghist, contains the tag information as expected by
                    the _readtags function, i.e. a mapping from tag name to (node, hist):
                      - node is the node id from the last line read for that name,
                      - hist is the list of node ids previously associated with it (in file
                        order). All node ids are binary, not hex.
                  - the second dict, hextaglines, is a mapping from tag name to a list of
                    [hexnode, line number] pairs, ordered from the oldest to the newest node.
                  When calcnodelines is False the hextaglines dict is not calculated (an
                  empty dict is returned). This is done to improve this function's
                  performance in cases where the line numbers are not needed.
                  '''
                  bintaghist = util.sortdict()
                  hextaglines = util.sortdict()
                  count = 0
                  def dbg(msg):
                      ui.debug("%s, line %s: %s\n" % (fn, count, msg))
                  for nline, line in enumerate(lines):
                      count += 1
                      if not line:
                          continue
                      try:
                          (nodehex, name) = line.split(" ", 1)
                      except ValueError:
                          dbg("cannot parse entry")
                          continue
                      name = name.strip()
                      if recode:
                          name = recode(name)
                      try:
                          nodebin = bin(nodehex)
                      except TypeError:
                          dbg("node '%s' is not well formed" % nodehex)
                          continue
                      # update filetags
                      if calcnodelines:
                          # map tag name to a list of line numbers
                          if name not in hextaglines:
                              hextaglines[name] = []
                          hextaglines[name].append([nodehex, nline])
                          continue
                      # map tag name to (node, hist)
                      if name not in bintaghist:
                          bintaghist[name] = []
                      bintaghist[name].append(nodebin)
                  return bintaghist, hextaglines
              def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
                  '''Read tag definitions from a file (or any source of lines).
                  Returns a mapping from tag name to (node, hist).
                  "node" is the node id from the last line read for that name. "hist"
                  is the list of node ids previously associated with it (in file order).
                  All node ids are binary, not hex.
                  '''
                  filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
                                                     calcnodelines=calcnodelines)
                  # util.sortdict().__setitem__ is much slower at replacing then inserting
                  # new entries. The difference can matter if there are thousands of tags.
                  # Create a new sortdict to avoid the performance penalty.
                  newtags = util.sortdict()
                  for tag, taghist in filetags.items():
                      newtags[tag] = (taghist[-1], taghist[:-1])
                  return newtags
              def _updatetags(filetags, tagtype, alltags, tagtypes):
                  '''Incorporate the tag info read from one file into the two
                  dictionaries, alltags and tagtypes, that contain all tag
                  info (global across all heads plus local).'''
                  for name, nodehist in filetags.iteritems():
                      if name not in alltags:
                          alltags[name] = nodehist
                          tagtypes[name] = tagtype
                          continue
                      # we prefer alltags[name] if:
                      #  it supersedes us OR
                      #  mutual supersedes and it has a higher rank
                      # otherwise we win because we're tip-most
                      anode, ahist = nodehist
                      bnode, bhist = alltags[name]
                      if (bnode != anode and anode in bhist and
                          (bnode not in ahist or len(bhist) > len(ahist))):
                          anode = bnode
                      else:
                          tagtypes[name] = tagtype
                      ahist.extend([n for n in bhist if n not in ahist])
                      alltags[name] = anode, ahist
              def _filename(repo):
                  """name of a tagcache file for a given repo or repoview"""
                  filename = 'cache/tags2'
                  if repo.filtername:
                      filename = '%s-%s' % (filename, repo.filtername)
                  return filename
              def _readtagcache(ui, repo):
                  '''Read the tag cache.
                  Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
                  If the cache is completely up-to-date, "cachetags" is a dict of the
                  form returned by _readtags() and "heads", "fnodes", and "validinfo" are
                  None and "shouldwrite" is False.
                  If the cache is not up to date, "cachetags" is None. "heads" is a list
                  of all heads currently in the repository, ordered from tip to oldest.
                  "validinfo" is a tuple describing cache validation info. This is used
                  when writing the tags cache. "fnodes" is a mapping from head to .hgtags
                  filenode. "shouldwrite" is True.
                  If the cache is not up to date, the caller is responsible for reading tag
                  info from each returned head. (See findglobaltags().)
                  '''
                  try:
                      cachefile = repo.vfs(_filename(repo), 'r')
                      # force reading the file for static-http
                      cachelines = iter(cachefile)
                  except IOError:
                      cachefile = None
                  cacherev = None
                  cachenode = None
                  cachehash = None
                  if cachefile:
                      try:
                          validline = next(cachelines)
                          validline = validline.split()
                          cacherev = int(validline[0])
                          cachenode = bin(validline[1])
                          if len(validline) > 2:
                              cachehash = bin(validline[2])
                      except Exception:
                          # corruption of the cache, just recompute it.
                          pass
                  tipnode = repo.changelog.tip()
                  tiprev = len(repo.changelog) - 1
                  # Case 1 (common): tip is the same, so nothing has changed.
                  # (Unchanged tip trivially means no changesets have been added.
                  # But, thanks to localrepository.destroyed(), it also means none
                  # have been destroyed by strip or rollback.)
                  if (cacherev == tiprev
                          and cachenode == tipnode
                          and cachehash == scmutil.filteredhash(repo, tiprev)):
                      tags = _readtags(ui, repo, cachelines, cachefile.name)
                      cachefile.close()
                      return (None, None, None, tags, False)
                  if cachefile:
                      cachefile.close()               # ignore rest of file
                  valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
                  repoheads = repo.heads()
                  # Case 2 (uncommon): empty repo; get out quickly and don't bother
                  # writing an empty cache.
                  if repoheads == [nullid]:
                      return ([], {}, valid, {}, False)
                  # Case 3 (uncommon): cache file missing or empty.
                  # Case 4 (uncommon): tip rev decreased.  This should only happen
                  # when we're called from localrepository.destroyed().  Refresh the
                  # cache so future invocations will not see disappeared heads in the
                  # cache.
                  # Case 5 (common): tip has changed, so we've added/replaced heads.
                  # As it happens, the code to handle cases 3, 4, 5 is the same.
                  # N.B. in case 4 (nodes destroyed), "new head" really means "newly
                  # exposed".
                  if not len(repo.file('.hgtags')):
                      # No tags have ever been committed, so we can avoid a
                      # potentially expensive search.
                      return ([], {}, valid, None, True)
                  starttime = util.timer()
                  # Now we have to lookup the .hgtags filenode for every new head.
                  # This is the most expensive part of finding tags, so performance
                  # depends primarily on the size of newheads.  Worst case: no cache
                  # file, so newheads == repoheads.
                  fnodescache = hgtagsfnodescache(repo.unfiltered())
                  cachefnode = {}
                  for head in reversed(repoheads):
                      fnode = fnodescache.getfnode(head)
                      if fnode != nullid:
                          cachefnode[head] = fnode
                  fnodescache.write()
                  duration = util.timer() - starttime
                  ui.log('tagscache',
                         '%d/%d cache hits/lookups in %0.4f '
                         'seconds\n',
                         fnodescache.hitcount, fnodescache.lookupcount, duration)
                  # Caller has to iterate over all heads, but can use the filenodes in
                  # cachefnode to get to each .hgtags revision quickly.
                  return (repoheads, cachefnode, valid, None, True)
              def _writetagcache(ui, repo, valid, cachetags):
                  filename = _filename(repo)
                  try:
                      cachefile = repo.vfs(filename, 'w', atomictemp=True)
                  except (OSError, IOError):
                      return
                  ui.log('tagscache', 'writing .hg/%s with %d tags\n',
                         filename, len(cachetags))
                  if valid[2]:
                      cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
                  else:
                      cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
                  # Tag names in the cache are in UTF-8 -- which is the whole reason
                  # we keep them in UTF-8 throughout this module.  If we converted
                  # them local encoding on input, we would lose info writing them to
                  # the cache.
                  for (name, (node, hist)) in sorted(cachetags.iteritems()):
                      for n in hist:
                          cachefile.write("%s %s\n" % (hex(n), name))
                      cachefile.write("%s %s\n" % (hex(node), name))
                  try:
                      cachefile.close()
                  except (OSError, IOError):
                      pass
              _fnodescachefile = 'cache/hgtagsfnodes1'
              _fnodesrecsize = 4 + 20 # changeset fragment + filenode
              _fnodesmissingrec = '\xff' * 24
              class hgtagsfnodescache(object):
                  """Persistent cache mapping revisions to .hgtags filenodes.
                  The cache is an array of records. Each item in the array corresponds to
                  a changelog revision. Values in the array contain the first 4 bytes of
                  the node hash and the 20 bytes .hgtags filenode for that revision.
                  The first 4 bytes are present as a form of verification. Repository
                  stripping and rewriting may change the node at a numeric revision in the
                  changelog. The changeset fragment serves as a verifier to detect
                  rewriting. This logic is shared with the rev branch cache (see
                  branchmap.py).
                  The instance holds in memory the full cache content but entries are
                  only parsed on read.
                  Instances behave like lists. ``c[i]`` works where i is a rev or
                  changeset node. Missing indexes are populated automatically on access.
                  """
                  def __init__(self, repo):
                      assert repo.filtername is None
                      self._repo = repo
                      # Only for reporting purposes.
                      self.lookupcount = 0
                      self.hitcount = 0
-                     self._raw = array('c')
                      try:
                          data = repo.vfs.read(_fnodescachefile)
                      except (OSError, IOError):
                          data = ""
-                     self._raw.fromstring(data)
+                     self._raw = bytearray(data)
                      # The end state of self._raw is an array that is of the exact length
                      # required to hold a record for every revision in the repository.
                      # We truncate or extend the array as necessary. self._dirtyoffset is
                      # defined to be the start offset at which we need to write the output
                      # file. This offset is also adjusted when new entries are calculated
                      # for array members.
                      cllen = len(repo.changelog)
                      wantedlen = cllen * _fnodesrecsize
                      rawlen = len(self._raw)
                      self._dirtyoffset = None
                      if rawlen < wantedlen:
                          self._dirtyoffset = rawlen
                          self._raw.extend('\xff' * (wantedlen - rawlen))
                      elif rawlen > wantedlen:
                          # There's no easy way to truncate array instances. This seems
                          # slightly less evil than copying a potentially large array slice.
                          for i in range(rawlen - wantedlen):
                              self._raw.pop()
                          self._dirtyoffset = len(self._raw)
                  def getfnode(self, node, computemissing=True):
                      """Obtain the filenode of the .hgtags file at a specified revision.
                      If the value is in the cache, the entry will be validated and returned.
                      Otherwise, the filenode will be computed and returned unless
                      "computemissing" is False, in which case None will be returned without
                      any potentially expensive computation being performed.
                      If an .hgtags does not exist at the specified revision, nullid is
                      returned.
                      """
                      ctx = self._repo[node]
                      rev = ctx.rev()
                      self.lookupcount += 1
                      offset = rev * _fnodesrecsize
-                     record = self._raw[offset:offset + _fnodesrecsize].tostring()
+                     record = '%s' % self._raw[offset:offset + _fnodesrecsize]
                      properprefix = node[0:4]
                      # Validate and return existing entry.
                      if record != _fnodesmissingrec:
                          fileprefix = record[0:4]
                          if fileprefix == properprefix:
                              self.hitcount += 1
                              return record[4:]
                          # Fall through.
                      # If we get here, the entry is either missing or invalid.
                      if not computemissing:
                          return None
                      # Populate missing entry.
                      try:
                          fnode = ctx.filenode('.hgtags')
                      except error.LookupError:
                          # No .hgtags file on this revision.
                          fnode = nullid
                      self._writeentry(offset, properprefix, fnode)
                      return fnode
                  def setfnode(self, node, fnode):
                      """Set the .hgtags filenode for a given changeset."""
                      assert len(fnode) == 20
                      ctx = self._repo[node]
                      # Do a lookup first to avoid writing if nothing has changed.
                      if self.getfnode(ctx.node(), computemissing=False) == fnode:
                          return
                      self._writeentry(ctx.rev() * _fnodesrecsize, node[0:4], fnode)
                  def _writeentry(self, offset, prefix, fnode):
                      # Slices on array instances only accept other array.
-                     entry = array('c', prefix + fnode)
+                     entry = bytearray(prefix + fnode)
                      self._raw[offset:offset + _fnodesrecsize] = entry
                      # self._dirtyoffset could be None.
                      self._dirtyoffset = min(self._dirtyoffset, offset) or 0
                  def write(self):
                      """Perform all necessary writes to cache file.
                      This may no-op if no writes are needed or if a write lock could
                      not be obtained.
                      """
                      if self._dirtyoffset is None:
                          return
                      data = self._raw[self._dirtyoffset:]
                      if not data:
                          return
                      repo = self._repo
                      try:
                          lock = repo.wlock(wait=False)
                      except error.LockError:
                          repo.ui.log('tagscache',
                                      'not writing .hg/%s because lock cannot be acquired\n' %
                                      (_fnodescachefile))
                          return
                      try:
                          f = repo.vfs.open(_fnodescachefile, 'ab')
                          try:
                              # if the file has been truncated
                              actualoffset = f.tell()
                              if actualoffset < self._dirtyoffset:
                                  self._dirtyoffset = actualoffset
                                  data = self._raw[self._dirtyoffset:]
                              f.seek(self._dirtyoffset)
                              f.truncate()
                              repo.ui.log('tagscache',
                                          'writing %d bytes to %s\n' % (
                                          len(data), _fnodescachefile))
                              f.write(data)
                              self._dirtyoffset = None
                          finally:
                              f.close()
                      except (IOError, OSError) as inst:
                          repo.ui.log('tagscache',
                                      "couldn't write %s: %s\n" % (
                                      _fnodescachefile, inst))
                      finally:
                          lock.release()

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages