upstream/mercurial-mirror Files · mercurial/branching/rev_cache.py

wirepeer: subclass new `repository.ipeer{,legacy}commands` Proctocol classes...

wirepeer: subclass new `repository.ipeer{,legacy}commands` Proctocol classes This is the same transformation as did for dirstate, but the CamelCase naming was already cleaned up here. See for the benefits of explicit subclassing. PyCharm is flagging the `wirepeer.getbundle` function with: Type of 'getbundle' is incompatible with 'ipeercommands' I've no idea why- maybe it's because it can infer a `unbundle20 | cg1unpacker` return there, or maybe it's the kwargs. Something to keep an eye on, but pytype doesn't complain. Since we're direct subclassing here and there are only a few methods on these interfaces, also make them abstract like was done in ef119f914fc1.

Matt Harbison - - Load All Authors

File last commit:

r53010:db1980a3 default


                r53394:61f70a6a

default

Download file

             rev_cache.py
        
                    455 lines
            
             | 16.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / branching / rev_cache.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
      # rev_cache.py - caching branch information per revision

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import annotations

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
      import os

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
      import struct

      from ..node import (

          nullrev,

      )

      from .. import (

          encoding,

          error,

        Matt Harbison
    
rev-branch-cache: disable mmapping by default on Windows...

              r53010
            
          pycompat,

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
          util,

      )

      from ..utils import (

          stringutil,

      )

      calcsize = struct.calcsize

      pack_into = struct.pack_into

      unpack_from = struct.unpack_from

      # Revision branch info cache

        marmoute
    
rev-branch-cache: increment the version to "v2"...

              r52799
            
      # The "V2" version use the same format as the "V1" but garantee it won't be

      # truncated, preventing SIGBUS when it is mmap-ed

      _rbcversion = b'-v2'

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
      _rbcnames = b'rbc-names' + _rbcversion

      _rbcrevs = b'rbc-revs' + _rbcversion

        marmoute
    
rev-branch-cache: fallback on "v1" data if no v2 is found...

              r52800
            
      _rbc_legacy_version = b'-v1'

      _rbc_legacy_names = b'rbc-names' + _rbc_legacy_version

      _rbc_legacy_revs = b'rbc-revs' + _rbc_legacy_version

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
      # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]

      _rbcrecfmt = b'>4sI'

      _rbcrecsize = calcsize(_rbcrecfmt)

      _rbcmininc = 64 * _rbcrecsize

      _rbcnodelen = 4

      _rbcbranchidxmask = 0x7FFFFFFF

      _rbccloseflag = 0x80000000

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
      # with atomic replacement.

      REWRITE_RATIO = 0.2

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
      class rbcrevs:

          """a byte string consisting of an immutable prefix followed by a mutable suffix"""

          def __init__(self, revs):

              self._prefix = revs

              self._rest = bytearray()

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
          @property

          def len_prefix(self):

              size = len(self._prefix)

              return size - (size % _rbcrecsize)

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
          def __len__(self):

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              return self.len_prefix + len(self._rest)

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
          def unpack_record(self, rbcrevidx):

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              if rbcrevidx < self.len_prefix:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)

              else:

                  return unpack_from(

                      _rbcrecfmt,

                      util.buffer(self._rest),

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
                      rbcrevidx - self.len_prefix,

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  )

          def make_mutable(self):

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              if self.len_prefix > 0:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  entirety = bytearray()

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
                  entirety[:] = self._prefix[: self.len_prefix]

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  entirety.extend(self._rest)

                  self._rest = entirety

                  self._prefix = bytearray()

          def truncate(self, pos):

              self.make_mutable()

              del self._rest[pos:]

          def pack_into(self, rbcrevidx, node, branchidx):

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              if rbcrevidx < self.len_prefix:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  self.make_mutable()

              buf = self._rest

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              start_offset = rbcrevidx - self.len_prefix

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
              end_offset = start_offset + _rbcrecsize

              if len(self._rest) < end_offset:

                  # bytearray doesn't allocate extra space at least in Python 3.7.

                  # When multiple changesets are added in a row, precise resize would

                  # result in quadratic complexity. Overallocate to compensate by

                  # using the classic doubling technique for dynamic arrays instead.

                  # If there was a gap in the map before, less space will be reserved.

                  self._rest.extend(b'\0' * end_offset)

              return pack_into(

                  _rbcrecfmt,

                  buf,

                  start_offset,

                  node,

                  branchidx,

              )

          def extend(self, extension):

              return self._rest.extend(extension)

          def slice(self, begin, end):

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              if begin < self.len_prefix:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  acc = bytearray()

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
                  acc[:] = self._prefix[begin : min(end, self.len_prefix)]

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  acc.extend(

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
                      self._rest[begin - self.len_prefix : end - self.len_prefix]

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  )

                  return acc

        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              return self._rest[begin - self.len_prefix : end - self.len_prefix]

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
      class revbranchcache:

          """Persistent cache, mapping from revision number to branch name and close.

          This is a low level cache, independent of filtering.

          Branch names are stored in rbc-names in internal encoding separated by 0.

          rbc-names is append-only, and each branch name is only stored once and will

          thus have a unique index.

          The branch info for each revision is stored in rbc-revs as constant size

          records. The whole file is read into memory, but it is only 'parsed' on

          demand. The file is usually append-only but will be truncated if repo

          modification is detected.

          The record for each revision contains the first 4 bytes of the

          corresponding node hash, and the record is only used if it still matches.

          Even a completely trashed rbc-revs fill thus still give the right result

          while converging towards full recovery ... assuming no incorrectly matching

          node hashes.

          The record also contains 4 bytes where 31 bits contains the index of the

          branch and the last bit indicate that it is a branch close commit.

          The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i

          and will grow with it but be 1/8th of its size.

          """

          def __init__(self, repo, readonly=True):

              assert repo.filtername is None

              self._repo = repo

              self._names = []  # branch names in local encoding with static index

              self._rbcrevs = rbcrevs(bytearray())

              self._rbcsnameslen = 0  # length of names read at _rbcsnameslen

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
              self._force_overwrite = False

        marmoute
    
rev-branch-cache: fallback on "v1" data if no v2 is found...

              r52800
            
              v1_fallback = False

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
              try:

        marmoute
    
rev-branch-cache: fallback on "v1" data if no v2 is found...

              r52800
            
                  try:

                      bndata = repo.cachevfs.read(_rbcnames)

                  except (IOError, OSError):

                      # If we don't have "v2" data, we might have "v1" data worth

                      # using.

                      #

                      # consider stop doing this many version after hg-6.9 release

                      bndata = repo.cachevfs.read(_rbc_legacy_names)

                      v1_fallback = True

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
                      self._force_overwrite = True

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                  self._rbcsnameslen = len(bndata)  # for verification before writing

                  if bndata:

                      self._names = [

                          encoding.tolocal(bn) for bn in bndata.split(b'\0')

                      ]

              except (IOError, OSError):

                  if readonly:

                      # don't try to use cache - fall back to the slow path

                      self.branchinfo = self._branchinfo

              if self._names:

                  try:

        Matt Harbison
    
rev-branch-cache: disable mmapping by default on Windows...

              r53010
            
                      # In order to rename the atomictempfile in _writerevs(), the

                      # existing file needs to be removed.  The Windows code

                      # (successfully) renames it to a temp file first, before moving

                      # the temp file into its place.  But the removal of the original

                      # file then fails, because it's still mapped.  The mmap object

                      # needs to be closed in order to remove the file, but in order

                      # to do that, the memoryview returned by util.buffer needs to be

                      # released.

                      usemmap = repo.ui.configbool(

                          b'storage',

                          b'revbranchcache.mmap',

                          default=not pycompat.iswindows,

                      )

        marmoute
    
rev-branch-cache: fallback on "v1" data if no v2 is found...

              r52800
            
                      if not v1_fallback:

                          with repo.cachevfs(_rbcrevs) as fp:

                              if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):

                                  data = util.buffer(util.mmapread(fp))

                              else:

                                  data = fp.read()

                      else:

                          # If we don't have "v2" data, we might have "v1" data worth

                          # using.

                          #

                          # Consider stop doing this many version after hg-6.9

                          # release.

                          with repo.cachevfs(_rbc_legacy_revs) as fp:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                              data = fp.read()

                      self._rbcrevs = rbcrevs(data)

                  except (IOError, OSError) as inst:

                      repo.ui.debug(

                          b"couldn't read revision branch cache: %s\n"

                          % stringutil.forcebytestr(inst)

                      )

              # remember number of good records on disk

              self._rbcrevslen = min(

                  len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)

              )

              if self._rbcrevslen == 0:

                  self._names = []

              self._rbcnamescount = len(self._names)  # number of names read at

              # _rbcsnameslen

          def _clear(self):

              self._rbcsnameslen = 0

              del self._names[:]

              self._rbcnamescount = 0

              self._rbcrevslen = len(self._repo.changelog)

              self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))

              util.clearcachedproperty(self, b'_namesreverse')

        marmoute
    
rev-branch-cache: add a way to force rewrite of the cache...

              r52796
            
              self._force_overwrite = True

          def invalidate(self, rev=0):

              self._rbcrevslen = rev

              self._rbcrevs.truncate(rev)

              self._force_overwrite = True

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
          @util.propertycache

          def _namesreverse(self):

              return {b: r for r, b in enumerate(self._names)}

          def branchinfo(self, rev):

              """Return branch name and close flag for rev, using and updating

              persistent cache."""

              changelog = self._repo.changelog

              rbcrevidx = rev * _rbcrecsize

              # avoid negative index, changelog.read(nullrev) is fast without cache

              if rev == nullrev:

                  return changelog.branchinfo(rev)

              # if requested rev isn't allocated, grow and cache the rev info

              if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:

                  return self._branchinfo(rev)

              # fast path: extract data from cache, use it if node is matching

              reponode = changelog.node(rev)[:_rbcnodelen]

              cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)

              close = bool(branchidx & _rbccloseflag)

              if close:

                  branchidx &= _rbcbranchidxmask

              if cachenode == b'\0\0\0\0':

                  pass

              elif cachenode == reponode:

                  try:

                      return self._names[branchidx], close

                  except IndexError:

                      # recover from invalid reference to unknown branch

                      self._repo.ui.debug(

                          b"referenced branch names not found"

                          b" - rebuilding revision branch cache from scratch\n"

                      )

                      self._clear()

              else:

                  # rev/node map has changed, invalidate the cache from here up

                  self._repo.ui.debug(

                      b"history modification detected - truncating "

                      b"revision branch cache to revision %d\n" % rev

                  )

                  truncate = rbcrevidx + _rbcrecsize

                  self._rbcrevs.truncate(truncate)

                  self._rbcrevslen = min(self._rbcrevslen, truncate)

              # fall back to slow path and make sure it will be written to disk

              return self._branchinfo(rev)

          def _branchinfo(self, rev):

              """Retrieve branch info from changelog and update _rbcrevs"""

              changelog = self._repo.changelog

              b, close = changelog.branchinfo(rev)

              if b in self._namesreverse:

                  branchidx = self._namesreverse[b]

              else:

                  branchidx = len(self._names)

                  self._names.append(b)

                  self._namesreverse[b] = branchidx

              reponode = changelog.node(rev)

              if close:

                  branchidx |= _rbccloseflag

              self._setcachedata(rev, reponode, branchidx)

              return b, close

          def setdata(self, rev, changelogrevision):

              """add new data information to the cache"""

              branch, close = changelogrevision.branchinfo

              if branch in self._namesreverse:

                  branchidx = self._namesreverse[branch]

              else:

                  branchidx = len(self._names)

                  self._names.append(branch)

                  self._namesreverse[branch] = branchidx

              if close:

                  branchidx |= _rbccloseflag

              self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)

              # If no cache data were readable (non exists, bad permission, etc)

              # the cache was bypassing itself by setting:

              #

              #   self.branchinfo = self._branchinfo

              #

              # Since we now have data in the cache, we need to drop this bypassing.

              if 'branchinfo' in vars(self):

                  del self.branchinfo

          def _setcachedata(self, rev, node, branchidx):

              """Writes the node's branch data to the in-memory cache data."""

              if rev == nullrev:

                  return

              rbcrevidx = rev * _rbcrecsize

              self._rbcrevs.pack_into(rbcrevidx, node, branchidx)

              self._rbcrevslen = min(self._rbcrevslen, rev)

              tr = self._repo.currenttransaction()

              if tr:

                  tr.addfinalize(b'write-revbranchcache', self.write)

          def write(self, tr=None):

              """Save branch cache if it is dirty."""

              repo = self._repo

              wlock = None

              step = b''

              try:

                  # write the new names

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
                  if self._force_overwrite or self._rbcnamescount < len(self._names):

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                      wlock = repo.wlock(wait=False)

                      step = b' names'

                      self._writenames(repo)

                  # write the new revs

                  start = self._rbcrevslen * _rbcrecsize

        marmoute
    
rev-branch-cache: add a way to force rewrite of the cache...

              r52796
            
                  if self._force_overwrite or start != len(self._rbcrevs):

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                      step = b''

                      if wlock is None:

                          wlock = repo.wlock(wait=False)

                      self._writerevs(repo, start)

              except (IOError, OSError, error.Abort, error.LockError) as inst:

                  repo.ui.debug(

                      b"couldn't write revision branch cache%s: %s\n"

                      % (step, stringutil.forcebytestr(inst))

                  )

              finally:

                  if wlock is not None:

                      wlock.release()

          def _writenames(self, repo):

              """write the new branch names to revbranchcache"""

        marmoute
    
rev-branch-cache: make sure we close the name file we open...

              r52797
            
              f = None

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
              if self._force_overwrite:

                  self._rbcsnameslen = 0

                  self._rbcnamescount = 0

        marmoute
    
rev-branch-cache: make sure we close the name file we open...

              r52797
            
              try:

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
                  if self._force_overwrite or self._rbcnamescount != 0:

        marmoute
    
rev-branch-cache: make sure we close the name file we open...

              r52797
            
                      f = repo.cachevfs.open(_rbcnames, b'ab')

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
                      current_size = f.tell()

                      if current_size == self._rbcsnameslen:

        marmoute
    
rev-branch-cache: make sure we close the name file we open...

              r52797
            
                          f.write(b'\0')

                      else:

                          f.close()

        marmoute
    
rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...

              r52801
            
                          if self._force_overwrite:

                              dbg = b"resetting content of %s\n"

                          elif current_size > 0:

                              dbg = b"%s changed - rewriting it\n"

                          else:

                              dbg = b"%s is missing - rewriting it\n"

                          repo.ui.debug(dbg % _rbcnames)

        marmoute
    
rev-branch-cache: make sure we close the name file we open...

              r52797
            
                          self._rbcnamescount = 0

                          self._rbcrevslen = 0

                  if self._rbcnamescount == 0:

                      # before rewriting names, make sure references are removed

                      repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)

                      f = repo.cachevfs.open(_rbcnames, b'wb')

                  names = self._names[self._rbcnamescount :]

                  from_local = encoding.fromlocal

                  data = b'\0'.join(from_local(b) for b in names)

                  f.write(data)

                  self._rbcsnameslen = f.tell()

              finally:

                  if f is not None:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                      f.close()

              self._rbcnamescount = len(self._names)

          def _writerevs(self, repo, start):

              """write the new revs to revbranchcache"""

              revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
              end = revs * _rbcrecsize

        marmoute
    
rev-branch-cache: add a way to force rewrite of the cache...

              r52796
            
              if self._force_overwrite:

                  start = 0

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
        marmoute
    
rev-branch-cache: properly ignores unaligned trailing data...

              r52868
            
              # align start on entry boundary

              start = _rbcrecsize * (start // _rbcrecsize)

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
              with repo.cachevfs.open(_rbcrevs, b'a+b') as f:

                  pass  # this make sure the file exist…

              with repo.cachevfs.open(_rbcrevs, b'r+b') as f:

                  f.seek(0, os.SEEK_END)

        marmoute
    
rev-branch-cache: issue more truthful "truncating" message...

              r52795
            
                  current_size = f.tell()

                  if current_size < start:

                      start = 0

                  if current_size != start:

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
                      threshold = current_size * REWRITE_RATIO

        marmoute
    
rev-branch-cache: stop pretending we will overwrite data when we don't...

              r52867
            
                      overwritten = min(end, current_size) - start

                      if (max(end, current_size) - start) >= threshold:

                          start = 0

                          dbg = b"resetting content of cache/%s\n" % _rbcrevs

                          repo.ui.debug(dbg)

                      elif overwritten > 0:

                          # end affected, let us overwrite the bad value

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
                          dbg = b"overwriting %d bytes from %d in cache/%s"

        marmoute
    
rev-branch-cache: stop pretending we will overwrite data when we don't...

              r52867
            
                          dbg %= (current_size - start, start, _rbcrevs)

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
                          if end < current_size:

                              extra = b" leaving (%d trailing bytes)"

                              extra %= current_size - end

                              dbg += extra

                          dbg += b'\n'

                          repo.ui.debug(dbg)

                      else:

        marmoute
    
rev-branch-cache: stop pretending we will overwrite data when we don't...

              r52867
            
                          # extra untouched data at the end, lets warn about them

                          assert start == end  # since don't write anything

                          dbg = b"cache/%s contains %d unknown trailing bytes\n"

                          dbg %= (_rbcrevs, current_size - start)

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
                          repo.ui.debug(dbg)

        marmoute
    
rev-branch-cache: stop pretending we will overwrite data when we don't...

              r52867
            
        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
                  if start > 0:

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
                      f.seek(start)

        marmoute
    
rev-branch-cache: stop truncating cache file...

              r52798
            
                      f.write(self._rbcrevs.slice(start, end))

                  else:

                      f.close()

                      with repo.cachevfs.open(

                          _rbcrevs,

                          b'wb',

                          atomictemp=True,

                      ) as rev_file:

                          rev_file.write(self._rbcrevs.slice(start, end))

        marmoute
    
rev-branch-cache: move the code in a dedicated module...

              r52794
            
              self._rbcrevslen = revs

        marmoute
    
rev-branch-cache: add a way to force rewrite of the cache...

              r52796
            
              self._force_overwrite = False

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marmoute rev-branch-cache: move the code in a dedicated module...	r52794	# rev_cache.py - caching branch information per revision
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.
		from __future__ import annotations

marmoute rev-branch-cache: stop truncating cache file...	r52798	import os
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	import struct

		from ..node import (
		nullrev,
		)

		from .. import (
		encoding,
		error,
Matt Harbison rev-branch-cache: disable mmapping by default on Windows...	r53010	pycompat,
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	util,
		)

		from ..utils import (
		stringutil,
		)

		calcsize = struct.calcsize
		pack_into = struct.pack_into
		unpack_from = struct.unpack_from


		# Revision branch info cache

marmoute rev-branch-cache: increment the version to "v2"...	r52799	# The "V2" version use the same format as the "V1" but garantee it won't be
		# truncated, preventing SIGBUS when it is mmap-ed
		_rbcversion = b'-v2'
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	_rbcnames = b'rbc-names' + _rbcversion
		_rbcrevs = b'rbc-revs' + _rbcversion
marmoute rev-branch-cache: fallback on "v1" data if no v2 is found...	r52800	_rbc_legacy_version = b'-v1'
		_rbc_legacy_names = b'rbc-names' + _rbc_legacy_version
		_rbc_legacy_revs = b'rbc-revs' + _rbc_legacy_version
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	# [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
		_rbcrecfmt = b'>4sI'
		_rbcrecsize = calcsize(_rbcrecfmt)
		_rbcmininc = 64 * _rbcrecsize
		_rbcnodelen = 4
		_rbcbranchidxmask = 0x7FFFFFFF
		_rbccloseflag = 0x80000000


marmoute rev-branch-cache: stop truncating cache file...	r52798	# with atomic replacement.
		REWRITE_RATIO = 0.2


marmoute rev-branch-cache: move the code in a dedicated module...	r52794	class rbcrevs:
		"""a byte string consisting of an immutable prefix followed by a mutable suffix"""

		def __init__(self, revs):
		self._prefix = revs
		self._rest = bytearray()

marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	@property
		def len_prefix(self):
		size = len(self._prefix)
		return size - (size % _rbcrecsize)

marmoute rev-branch-cache: move the code in a dedicated module...	r52794	def __len__(self):
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	return self.len_prefix + len(self._rest)
marmoute rev-branch-cache: move the code in a dedicated module...	r52794
		def unpack_record(self, rbcrevidx):
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	if rbcrevidx < self.len_prefix:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
		else:
		return unpack_from(
		_rbcrecfmt,
		util.buffer(self._rest),
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	rbcrevidx - self.len_prefix,
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	)

		def make_mutable(self):
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	if self.len_prefix > 0:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	entirety = bytearray()
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	entirety[:] = self._prefix[: self.len_prefix]
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	entirety.extend(self._rest)
		self._rest = entirety
		self._prefix = bytearray()

		def truncate(self, pos):
		self.make_mutable()
		del self._rest[pos:]

		def pack_into(self, rbcrevidx, node, branchidx):
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	if rbcrevidx < self.len_prefix:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	self.make_mutable()
		buf = self._rest
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	start_offset = rbcrevidx - self.len_prefix
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	end_offset = start_offset + _rbcrecsize

		if len(self._rest) < end_offset:
		# bytearray doesn't allocate extra space at least in Python 3.7.
		# When multiple changesets are added in a row, precise resize would
		# result in quadratic complexity. Overallocate to compensate by
		# using the classic doubling technique for dynamic arrays instead.
		# If there was a gap in the map before, less space will be reserved.
		self._rest.extend(b'\0' * end_offset)
		return pack_into(
		_rbcrecfmt,
		buf,
		start_offset,
		node,
		branchidx,
		)

		def extend(self, extension):
		return self._rest.extend(extension)

		def slice(self, begin, end):
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	if begin < self.len_prefix:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	acc = bytearray()
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	acc[:] = self._prefix[begin : min(end, self.len_prefix)]
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	acc.extend(
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	self._rest[begin - self.len_prefix : end - self.len_prefix]
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	)
		return acc
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	return self._rest[begin - self.len_prefix : end - self.len_prefix]
marmoute rev-branch-cache: move the code in a dedicated module...	r52794

		class revbranchcache:
		"""Persistent cache, mapping from revision number to branch name and close.
		This is a low level cache, independent of filtering.

		Branch names are stored in rbc-names in internal encoding separated by 0.
		rbc-names is append-only, and each branch name is only stored once and will
		thus have a unique index.

		The branch info for each revision is stored in rbc-revs as constant size
		records. The whole file is read into memory, but it is only 'parsed' on
		demand. The file is usually append-only but will be truncated if repo
		modification is detected.
		The record for each revision contains the first 4 bytes of the
		corresponding node hash, and the record is only used if it still matches.
		Even a completely trashed rbc-revs fill thus still give the right result
		while converging towards full recovery ... assuming no incorrectly matching
		node hashes.
		The record also contains 4 bytes where 31 bits contains the index of the
		branch and the last bit indicate that it is a branch close commit.
		The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
		and will grow with it but be 1/8th of its size.
		"""

		def __init__(self, repo, readonly=True):
		assert repo.filtername is None
		self._repo = repo
		self._names = [] # branch names in local encoding with static index
		self._rbcrevs = rbcrevs(bytearray())
		self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	self._force_overwrite = False
marmoute rev-branch-cache: fallback on "v1" data if no v2 is found...	r52800	v1_fallback = False
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	try:
marmoute rev-branch-cache: fallback on "v1" data if no v2 is found...	r52800	try:
		bndata = repo.cachevfs.read(_rbcnames)
		except (IOError, OSError):
		# If we don't have "v2" data, we might have "v1" data worth
		# using.
		#
		# consider stop doing this many version after hg-6.9 release
		bndata = repo.cachevfs.read(_rbc_legacy_names)
		v1_fallback = True
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	self._force_overwrite = True
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	self._rbcsnameslen = len(bndata) # for verification before writing
		if bndata:
		self._names = [
		encoding.tolocal(bn) for bn in bndata.split(b'\0')
		]
		except (IOError, OSError):
		if readonly:
		# don't try to use cache - fall back to the slow path
		self.branchinfo = self._branchinfo

		if self._names:
		try:
Matt Harbison rev-branch-cache: disable mmapping by default on Windows...	r53010	# In order to rename the atomictempfile in _writerevs(), the
		# existing file needs to be removed. The Windows code
		# (successfully) renames it to a temp file first, before moving
		# the temp file into its place. But the removal of the original
		# file then fails, because it's still mapped. The mmap object
		# needs to be closed in order to remove the file, but in order
		# to do that, the memoryview returned by util.buffer needs to be
		# released.
		usemmap = repo.ui.configbool(
		b'storage',
		b'revbranchcache.mmap',
		default=not pycompat.iswindows,
		)
marmoute rev-branch-cache: fallback on "v1" data if no v2 is found...	r52800	if not v1_fallback:
		with repo.cachevfs(_rbcrevs) as fp:
		if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):
		data = util.buffer(util.mmapread(fp))
		else:
		data = fp.read()
		else:
		# If we don't have "v2" data, we might have "v1" data worth
		# using.
		#
		# Consider stop doing this many version after hg-6.9
		# release.
		with repo.cachevfs(_rbc_legacy_revs) as fp:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	data = fp.read()
		self._rbcrevs = rbcrevs(data)
		except (IOError, OSError) as inst:
		repo.ui.debug(
		b"couldn't read revision branch cache: %s\n"
		% stringutil.forcebytestr(inst)
		)
		# remember number of good records on disk
		self._rbcrevslen = min(
		len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
		)
		if self._rbcrevslen == 0:
		self._names = []
		self._rbcnamescount = len(self._names) # number of names read at
		# _rbcsnameslen

		def _clear(self):
		self._rbcsnameslen = 0
		del self._names[:]
		self._rbcnamescount = 0
		self._rbcrevslen = len(self._repo.changelog)
		self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
		util.clearcachedproperty(self, b'_namesreverse')
marmoute rev-branch-cache: add a way to force rewrite of the cache...	r52796	self._force_overwrite = True

		def invalidate(self, rev=0):
		self._rbcrevslen = rev
		self._rbcrevs.truncate(rev)
		self._force_overwrite = True
marmoute rev-branch-cache: move the code in a dedicated module...	r52794
		@util.propertycache
		def _namesreverse(self):
		return {b: r for r, b in enumerate(self._names)}

		def branchinfo(self, rev):
		"""Return branch name and close flag for rev, using and updating
		persistent cache."""
		changelog = self._repo.changelog
		rbcrevidx = rev * _rbcrecsize

		# avoid negative index, changelog.read(nullrev) is fast without cache
		if rev == nullrev:
		return changelog.branchinfo(rev)

		# if requested rev isn't allocated, grow and cache the rev info
		if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
		return self._branchinfo(rev)

		# fast path: extract data from cache, use it if node is matching
		reponode = changelog.node(rev)[:_rbcnodelen]
		cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
		close = bool(branchidx & _rbccloseflag)
		if close:
		branchidx &= _rbcbranchidxmask
		if cachenode == b'\0\0\0\0':
		pass
		elif cachenode == reponode:
		try:
		return self._names[branchidx], close
		except IndexError:
		# recover from invalid reference to unknown branch
		self._repo.ui.debug(
		b"referenced branch names not found"
		b" - rebuilding revision branch cache from scratch\n"
		)
		self._clear()
		else:
		# rev/node map has changed, invalidate the cache from here up
		self._repo.ui.debug(
		b"history modification detected - truncating "
		b"revision branch cache to revision %d\n" % rev
		)
		truncate = rbcrevidx + _rbcrecsize
		self._rbcrevs.truncate(truncate)
		self._rbcrevslen = min(self._rbcrevslen, truncate)

		# fall back to slow path and make sure it will be written to disk
		return self._branchinfo(rev)

		def _branchinfo(self, rev):
		"""Retrieve branch info from changelog and update _rbcrevs"""
		changelog = self._repo.changelog
		b, close = changelog.branchinfo(rev)
		if b in self._namesreverse:
		branchidx = self._namesreverse[b]
		else:
		branchidx = len(self._names)
		self._names.append(b)
		self._namesreverse[b] = branchidx
		reponode = changelog.node(rev)
		if close:
		branchidx \|= _rbccloseflag
		self._setcachedata(rev, reponode, branchidx)
		return b, close

		def setdata(self, rev, changelogrevision):
		"""add new data information to the cache"""
		branch, close = changelogrevision.branchinfo

		if branch in self._namesreverse:
		branchidx = self._namesreverse[branch]
		else:
		branchidx = len(self._names)
		self._names.append(branch)
		self._namesreverse[branch] = branchidx
		if close:
		branchidx \|= _rbccloseflag
		self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
		# If no cache data were readable (non exists, bad permission, etc)
		# the cache was bypassing itself by setting:
		#
		# self.branchinfo = self._branchinfo
		#
		# Since we now have data in the cache, we need to drop this bypassing.
		if 'branchinfo' in vars(self):
		del self.branchinfo

		def _setcachedata(self, rev, node, branchidx):
		"""Writes the node's branch data to the in-memory cache data."""
		if rev == nullrev:
		return
		rbcrevidx = rev * _rbcrecsize
		self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
		self._rbcrevslen = min(self._rbcrevslen, rev)

		tr = self._repo.currenttransaction()
		if tr:
		tr.addfinalize(b'write-revbranchcache', self.write)

		def write(self, tr=None):
		"""Save branch cache if it is dirty."""
		repo = self._repo
		wlock = None
		step = b''
		try:
		# write the new names
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	if self._force_overwrite or self._rbcnamescount < len(self._names):
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	wlock = repo.wlock(wait=False)
		step = b' names'
		self._writenames(repo)

		# write the new revs
		start = self._rbcrevslen * _rbcrecsize
marmoute rev-branch-cache: add a way to force rewrite of the cache...	r52796	if self._force_overwrite or start != len(self._rbcrevs):
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	step = b''
		if wlock is None:
		wlock = repo.wlock(wait=False)
		self._writerevs(repo, start)

		except (IOError, OSError, error.Abort, error.LockError) as inst:
		repo.ui.debug(
		b"couldn't write revision branch cache%s: %s\n"
		% (step, stringutil.forcebytestr(inst))
		)
		finally:
		if wlock is not None:
		wlock.release()

		def _writenames(self, repo):
		"""write the new branch names to revbranchcache"""
marmoute rev-branch-cache: make sure we close the name file we open...	r52797	f = None
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	if self._force_overwrite:
		self._rbcsnameslen = 0
		self._rbcnamescount = 0
marmoute rev-branch-cache: make sure we close the name file we open...	r52797	try:
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	if self._force_overwrite or self._rbcnamescount != 0:
marmoute rev-branch-cache: make sure we close the name file we open...	r52797	f = repo.cachevfs.open(_rbcnames, b'ab')
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	current_size = f.tell()
		if current_size == self._rbcsnameslen:
marmoute rev-branch-cache: make sure we close the name file we open...	r52797	f.write(b'\0')
		else:
		f.close()
marmoute rev-branch-cache: schedule a write of the "v2" format if we read from "v1"...	r52801	if self._force_overwrite:
		dbg = b"resetting content of %s\n"
		elif current_size > 0:
		dbg = b"%s changed - rewriting it\n"
		else:
		dbg = b"%s is missing - rewriting it\n"
		repo.ui.debug(dbg % _rbcnames)
marmoute rev-branch-cache: make sure we close the name file we open...	r52797	self._rbcnamescount = 0
		self._rbcrevslen = 0
		if self._rbcnamescount == 0:
		# before rewriting names, make sure references are removed
		repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
		f = repo.cachevfs.open(_rbcnames, b'wb')
		names = self._names[self._rbcnamescount :]
		from_local = encoding.fromlocal
		data = b'\0'.join(from_local(b) for b in names)
		f.write(data)
		self._rbcsnameslen = f.tell()
		finally:
		if f is not None:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	f.close()
		self._rbcnamescount = len(self._names)

		def _writerevs(self, repo, start):
		"""write the new revs to revbranchcache"""
		revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
marmoute rev-branch-cache: stop truncating cache file...	r52798
		end = revs * _rbcrecsize
marmoute rev-branch-cache: add a way to force rewrite of the cache...	r52796	if self._force_overwrite:
		start = 0
marmoute rev-branch-cache: stop truncating cache file...	r52798
marmoute rev-branch-cache: properly ignores unaligned trailing data...	r52868	# align start on entry boundary
		start = _rbcrecsize * (start // _rbcrecsize)

marmoute rev-branch-cache: stop truncating cache file...	r52798	with repo.cachevfs.open(_rbcrevs, b'a+b') as f:
		pass # this make sure the file exist…
		with repo.cachevfs.open(_rbcrevs, b'r+b') as f:
		f.seek(0, os.SEEK_END)
marmoute rev-branch-cache: issue more truthful "truncating" message...	r52795	current_size = f.tell()
		if current_size < start:
		start = 0
		if current_size != start:
marmoute rev-branch-cache: stop truncating cache file...	r52798	threshold = current_size * REWRITE_RATIO
marmoute rev-branch-cache: stop pretending we will overwrite data when we don't...	r52867	overwritten = min(end, current_size) - start
		if (max(end, current_size) - start) >= threshold:
		start = 0
		dbg = b"resetting content of cache/%s\n" % _rbcrevs
		repo.ui.debug(dbg)
		elif overwritten > 0:
		# end affected, let us overwrite the bad value
marmoute rev-branch-cache: stop truncating cache file...	r52798	dbg = b"overwriting %d bytes from %d in cache/%s"
marmoute rev-branch-cache: stop pretending we will overwrite data when we don't...	r52867	dbg %= (current_size - start, start, _rbcrevs)
marmoute rev-branch-cache: stop truncating cache file...	r52798	if end < current_size:
		extra = b" leaving (%d trailing bytes)"
		extra %= current_size - end
		dbg += extra
		dbg += b'\n'
		repo.ui.debug(dbg)
		else:
marmoute rev-branch-cache: stop pretending we will overwrite data when we don't...	r52867	# extra untouched data at the end, lets warn about them
		assert start == end # since don't write anything
		dbg = b"cache/%s contains %d unknown trailing bytes\n"
		dbg %= (_rbcrevs, current_size - start)
marmoute rev-branch-cache: stop truncating cache file...	r52798	repo.ui.debug(dbg)
marmoute rev-branch-cache: stop pretending we will overwrite data when we don't...	r52867
marmoute rev-branch-cache: stop truncating cache file...	r52798	if start > 0:
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	f.seek(start)
marmoute rev-branch-cache: stop truncating cache file...	r52798	f.write(self._rbcrevs.slice(start, end))
		else:
		f.close()
		with repo.cachevfs.open(
		_rbcrevs,
		b'wb',
		atomictemp=True,
		) as rev_file:
		rev_file.write(self._rbcrevs.slice(start, end))
marmoute rev-branch-cache: move the code in a dedicated module...	r52794	self._rbcrevslen = revs
marmoute rev-branch-cache: add a way to force rewrite of the cache...	r52796	self._force_overwrite = False