upstream/mercurial-mirror Files · mercurial/revlogutils/nodemap.py

test-grep: document that trivially moved lines might not be detected...

test-grep: document that trivially moved lines might not be detected I'm not going to fix this. I just learned "grep --diff" does clever thing than a simple grep(patch.diff(context=0)), and I think it's better to document how things work.

marmoute - - Load All Authors

File last commit:

r45296:5e3c7186 default


                r45998:9c8d2cf7

default

Download file

             nodemap.py
        
                    644 lines
            
             | 20.6 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / revlogutils / nodemap.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marmoute
    
revlogutils: move the NodeMap class in a dedicated nodemap module...

              r44486
            
      # nodemap.py - nodemap related code and utilities

      #

      # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>

      # Copyright 2019 George Racinet <georges.racinet@octobus.net>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
      import errno

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
      import os

        marmoute
    
nodemap: delete older raw data file when creating a new ones...

              r44793
            
      import re

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      import struct

        marmoute
    
nodemap: add a new mode option, with an optional "warn" value...

              r45292
            
      from ..i18n import _

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      from .. import (

          error,

          node as nodemod,

        marmoute
    
nodemap: add a optional `nodemap_add_full` method on indexes...

              r44795
            
          util,

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      )

        marmoute
    
revlogutils: move the NodeMap class in a dedicated nodemap module...

              r44486
            
      class NodeMap(dict):

          def __missing__(self, x):

              raise error.RevlogError(b'unknown node: %s' % x)

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
        marmoute
    
nodemap: add a function to read the data from disk...

              r44790
            
      def persisted_data(revlog):

          """read the nodemap for a revlog from disk"""

          if revlog.nodemap_file is None:

              return None

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
          pdata = revlog.opener.tryread(revlog.nodemap_file)

          if not pdata:

              return None

          offset = 0

          (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

          if version != ONDISK_VERSION:

              return None

          offset += S_VERSION.size

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
          headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
          uid_size, tip_rev, data_length, data_unused, tip_node_size = headers

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
          offset += S_HEADER.size

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          docket = NodeMapDocket(pdata[offset : offset + uid_size])

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
          offset += uid_size

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
          docket.tip_rev = tip_rev

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
          docket.tip_node = pdata[offset : offset + tip_node_size]

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
          docket.data_length = data_length

          docket.data_unused = data_unused

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          filename = _rawdata_filepath(revlog, docket)

        marmoute
    
nodemap: drop the 'exp-' prefix for internal opener option...

              r45296
            
          use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")

        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
          try:

              with revlog.opener(filename) as fd:

                  if use_mmap:

                      data = util.buffer(util.mmapread(fd, data_length))

                  else:

                      data = fd.read(data_length)

          except OSError as e:

              if e.errno != errno.ENOENT:

                  raise

        marmoute
    
nodemap: never read more than the expected data amount...

              r44811
            
          if len(data) < data_length:

              return None

          return docket, data

        marmoute
    
nodemap: add a function to read the data from disk...

              r44790
            
        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
      def setup_persistent_nodemap(tr, revlog):

          """Install whatever is needed transaction side to persist a nodemap on disk

          (only actually persist the nodemap if this is relevant for this revlog)

          """

        marmoute
    
nodemap: only use persistent nodemap for non-inlined revlog...

              r44791
            
          if revlog._inline:

              return  # inlined revlog are too small for this to be relevant

        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
          if revlog.nodemap_file is None:

              return  # we do not use persistent_nodemap on this revlog

        marmoute
    
nodemap: make sure the nodemap docket is updated after the changelog...

              r45004
            
          # we need to happen after the changelog finalization, in that use "cl-"

          callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file

        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
          if tr.hasfinalize(callback_id):

              return  # no need to register again

        marmoute
    
nodemap: make sure hooks have access to an up-to-date version...

              r45003
            
          tr.addpending(

              callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)

          )

        marmoute
    
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...

              r44986
            
          tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

      class _NoTransaction(object):

          """transaction like object to update the nodemap outside a transaction

          """

          def __init__(self):

              self._postclose = {}

          def addpostclose(self, callback_id, callback_func):

              self._postclose[callback_id] = callback_func

        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
        marmoute
    
nodemap: make sure on disk change get rolled back with the  transaction...

              r45124
            
          def registertmp(self, *args, **kwargs):

              pass

          def addbackup(self, *args, **kwargs):

              pass

          def add(self, *args, **kwargs):

              pass

        marmoute
    
nodemap: display percentage of unused in `hg debugnodemap`...

              r45125
            
          def addabort(self, *args, **kwargs):

              pass

        marmoute
    
nodemap: add a new mode option, with an optional "warn" value...

              r45292
            
          def _report(self, *args):

              pass

        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
        marmoute
    
nodemap: warm the persistent nodemap on disk with debugupdatecache...

              r44932
            
      def update_persistent_nodemap(revlog):

          """update the persistent nodemap right now

          To be used for updating the nodemap on disk outside of a normal transaction

          setup (eg, `debugupdatecache`).

          """

        marmoute
    
nodemap: skip persistent nodemap warming for revlog not using it...

              r45247
            
          if revlog._inline:

              return  # inlined revlog are too small for this to be relevant

          if revlog.nodemap_file is None:

              return  # we do not use persistent_nodemap on this revlog

        marmoute
    
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...

              r44986
            
          notr = _NoTransaction()

          _persist_nodemap(notr, revlog)

          for k in sorted(notr._postclose):

              notr._postclose[k](None)

        marmoute
    
nodemap: warm the persistent nodemap on disk with debugupdatecache...

              r44932
            
        marmoute
    
nodemap: make sure hooks have access to an up-to-date version...

              r45003
            
      def _persist_nodemap(tr, revlog, pending=False):

        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
          """Write nodemap data on disk for a given revlog

          """

          if getattr(revlog, 'filteredrevs', ()):

              raise error.ProgrammingError(

                  "cannot persist nodemap of a filtered changelog"

              )

          if revlog.nodemap_file is None:

              msg = "calling persist nodemap on a revlog without the feature enableb"

              raise error.ProgrammingError(msg)

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")

          ondisk_docket = revlog._nodemap_docket

        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
          feed_data = util.safehasattr(revlog.index, "update_nodemap_data")

        marmoute
    
nodemap: drop the 'exp-' prefix for internal opener option...

              r45296
            
          use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")

          mode = revlog.opener.options.get(b"persistent-nodemap.mode")

        marmoute
    
nodemap: add a new mode option, with an optional "warn" value...

              r45292
            
          if not can_incremental:

              msg = _(b"persistent nodemap in strict mode without efficient method")

              if mode == b'warn':

                  tr._report(b"%s\n" % msg)

        marmoute
    
nodemap: add a new mode value, "strict"...

              r45293
            
              elif mode == b'strict':

                  raise error.Abort(msg)

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
          data = None

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          # first attemp an incremental update of the data

          if can_incremental and ondisk_docket is not None:

              target_docket = revlog._nodemap_docket.copy()

        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
              (

                  src_docket,

                  data_changed_count,

                  data,

              ) = revlog.index.nodemap_data_incremental()

        marmoute
    
nodemap: automatically "vacuum" the persistent nodemap when too sparse...

              r45126
            
              new_length = target_docket.data_length + len(data)

              new_unused = target_docket.data_unused + data_changed_count

        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
              if src_docket != target_docket:

                  data = None

        marmoute
    
nodemap: automatically "vacuum" the persistent nodemap when too sparse...

              r45126
            
              elif new_length <= (new_unused * 10):  # under 10% of unused data

                  data = None

        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
              else:

                  datafile = _rawdata_filepath(revlog, target_docket)

                  # EXP-TODO: if this is a cache, this should use a cache vfs, not a

                  # store vfs

        marmoute
    
nodemap: make sure on disk change get rolled back with the  transaction...

              r45124
            
                  tr.add(datafile, target_docket.data_length)

        marmoute
    
nodemap: write new data from the expected current data length...

              r44810
            
                  with revlog.opener(datafile, b'r+') as fd:

                      fd.seek(target_docket.data_length)

        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
                      fd.write(data)

        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
                      if feed_data:

                          if use_mmap:

                              fd.seek(0)

                              new_data = fd.read(new_length)

                          else:

                              fd.flush()

                              new_data = util.buffer(util.mmapread(fd, new_length))

                  target_docket.data_length = new_length

        marmoute
    
nodemap: automatically "vacuum" the persistent nodemap when too sparse...

              r45126
            
                  target_docket.data_unused = new_unused

        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
          if data is None:

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
              # otherwise fallback to a full new export

              target_docket = NodeMapDocket()

              datafile = _rawdata_filepath(revlog, target_docket)

              if util.safehasattr(revlog.index, "nodemap_data_all"):

                  data = revlog.index.nodemap_data_all()

              else:

                  data = persistent_data(revlog.index)

              # EXP-TODO: if this is a cache, this should use a cache vfs, not a

              # store vfs

        marmoute
    
nodemap: display percentage of unused in `hg debugnodemap`...

              r45125
            
              tryunlink = revlog.opener.tryunlink

              def abortck(tr):

                  tryunlink(datafile)

              callback_id = b"delete-%s" % datafile

              # some flavor of the transaction abort does not cleanup new file, it

              # simply empty them.

              tr.addabort(callback_id, abortck)

        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
              with revlog.opener(datafile, b'w+') as fd:

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
                  fd.write(data)

        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
                  if feed_data:

                      if use_mmap:

                          new_data = data

                      else:

                          fd.flush()

                          new_data = util.buffer(util.mmapread(fd, len(data)))

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              target_docket.data_length = len(data)

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
          target_docket.tip_rev = revlog.tiprev()

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
          target_docket.tip_node = revlog.node(target_docket.tip_rev)

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          # EXP-TODO: if this is a cache, this should use a cache vfs, not a

          # store vfs

        marmoute
    
nodemap: make sure hooks have access to an up-to-date version...

              r45003
            
          file_path = revlog.nodemap_file

          if pending:

              file_path += b'.a'

        marmoute
    
nodemap: make sure on disk change get rolled back with the  transaction...

              r45124
            
              tr.registertmp(file_path)

          else:

              tr.addbackup(file_path)

        marmoute
    
nodemap: make sure hooks have access to an up-to-date version...

              r45003
            
          with revlog.opener(file_path, b'w', atomictemp=True) as fp:

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
              fp.write(target_docket.serialize())

          revlog._nodemap_docket = target_docket

        marmoute
    
nodemap: introduce an option to use mmap to read the nodemap mapping...

              r44843
            
          if feed_data:

        marmoute
    
nodemap: update the index with the newly written data (when appropriate)...

              r44812
            
              revlog.index.update_nodemap_data(target_docket, new_data)

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          # search for old index file in all cases, some older process might have

          # left one behind.

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          olds = _other_rawdata_filepath(revlog, target_docket)

        marmoute
    
nodemap: delete older raw data file when creating a new ones...

              r44793
            
          if olds:

              realvfs = getattr(revlog, '_realopener', revlog.opener)

              def cleanup(tr):

                  for oldfile in olds:

                      realvfs.tryunlink(oldfile)

              callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

        marmoute
    
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...

              r44986
            
              tr.addpostclose(callback_id, cleanup)

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
      ### Nodemap docket file

      #

      # The nodemap data are stored on disk using 2 files:

      #

      # * a raw data files containing a persistent nodemap

      #   (see `Nodemap Trie` section)

      #

      # * a small "docket" file containing medatadata

      #

      # While the nodemap data can be multiple tens of megabytes, the "docket" is

      # small, it is easy to update it automatically or to duplicated its content

      # during a transaction.

      #

      # Multiple raw data can exist at the same time (The currently valid one and a

      # new one beind used by an in progress transaction). To accomodate this, the

      # filename hosting the raw data has a variable parts. The exact filename is

      # specified inside the "docket" file.

      #

      # The docket file contains information to find, qualify and validate the raw

      # data. Its content is currently very light, but it will expand as the on disk

      # nodemap gains the necessary features to be used in production.

        marmoute
    
nodemap: move on disk file to version 1...

              r45294
            
      ONDISK_VERSION = 1

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
      S_VERSION = struct.Struct(">B")

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
      S_HEADER = struct.Struct(">BQQQQ")

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
      ID_SIZE = 8

      def _make_uid():

          """return a new unique identifier.

          The identifier is random and composed of ascii characters."""

          return nodemod.hex(os.urandom(ID_SIZE))

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
      class NodeMapDocket(object):

          """metadata associated with persistent nodemap data

          The persistent data may come from disk or be on their way to disk.

          """

          def __init__(self, uid=None):

              if uid is None:

                  uid = _make_uid()

        marmoute
    
nodemap: document the docket attributes...

              r44983
            
              # a unique identifier for the data file:

              #   - When new data are appended, it is preserved.

              #   - When a new data file is created, a new identifier is generated.

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
              self.uid = uid

        marmoute
    
nodemap: document the docket attributes...

              r44983
            
              # the tipmost revision stored in the data file. This revision and all

              # revision before it are expected to be encoded in the data file.

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
              self.tip_rev = None

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
              # the node of that tipmost revision, if it mismatch the current index

              # data the docket is not valid for the current index and should be

              # discarded.

              #

              # note: this method is not perfect as some destructive operation could

              # preserve the same tip_rev + tip_node while altering lower revision.

              # However this multiple other caches have the same vulnerability (eg:

              # brancmap cache).

              self.tip_node = None

        marmoute
    
nodemap: document the docket attributes...

              r44983
            
              # the size (in bytes) of the persisted data to encode the nodemap valid

              # for `tip_rev`.

              #   - data file shorter than this are corrupted,

              #   - any extra data should be ignored.

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              self.data_length = None

        marmoute
    
nodemap: document the docket attributes...

              r44983
            
              # the amount (in bytes) of "dead" data, still in the data file but no

              # longer used for the nodemap.

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              self.data_unused = 0

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          def copy(self):

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
              new = NodeMapDocket(uid=self.uid)

              new.tip_rev = self.tip_rev

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
              new.tip_node = self.tip_node

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              new.data_length = self.data_length

              new.data_unused = self.data_unused

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
              return new

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
        marmoute
    
nodemap: double check the source docket when doing incremental update...

              r44809
            
          def __cmp__(self, other):

              if self.uid < other.uid:

                  return -1

              if self.uid > other.uid:

                  return 1

              elif self.data_length < other.data_length:

                  return -1

              elif self.data_length > other.data_length:

                  return 1

              return 0

          def __eq__(self, other):

              return self.uid == other.uid and self.data_length == other.data_length

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          def serialize(self):

              """return serialized bytes for a docket using the passed uid"""

              data = []

              data.append(S_VERSION.pack(ONDISK_VERSION))

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              headers = (

                  len(self.uid),

                  self.tip_rev,

                  self.data_length,

                  self.data_unused,

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
                  len(self.tip_node),

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              )

        marmoute
    
nodemap: track the maximum revision tracked in the nodemap...

              r44807
            
              data.append(S_HEADER.pack(*headers))

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
              data.append(self.uid)

        marmoute
    
nodemap: track the tip_node for validation...

              r45002
            
              data.append(self.tip_node)

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
              return b''.join(data)

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
      def _rawdata_filepath(revlog, docket):

        marmoute
    
nodemap: use an intermediate "docket" file to carry small metadata...

              r44792
            
          """The (vfs relative) nodemap's rawdata file for a given uid"""

        marmoute
    
nodemap: make sure hooks have access to an up-to-date version...

              r45003
            
          if revlog.nodemap_file.endswith(b'.n.a'):

              prefix = revlog.nodemap_file[:-4]

          else:

              prefix = revlog.nodemap_file[:-2]

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          return b"%s-%s.nd" % (prefix, docket.uid)

        marmoute
    
nodemap: write nodemap data on disk...

              r44789
            
        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
      def _other_rawdata_filepath(revlog, docket):

        marmoute
    
nodemap: delete older raw data file when creating a new ones...

              r44793
            
          prefix = revlog.nodemap_file[:-2]

        Augie Fackler
    
nodemap: fix missing r-prefix on regular expression...

              r44952
            
          pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

        marmoute
    
nodemap: introduce an explicit class/object for the docket...

              r44803
            
          new_file_path = _rawdata_filepath(revlog, docket)

        marmoute
    
nodemap: delete older raw data file when creating a new ones...

              r44793
            
          new_file_name = revlog.opener.basename(new_file_path)

          dirpath = revlog.opener.dirname(new_file_path)

          others = []

          for f in revlog.opener.listdir(dirpath):

              if pattern.match(f) and f != new_file_name:

                  others.append(f)

          return others

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      ### Nodemap Trie

      #

      # This is a simple reference implementation to compute and persist a nodemap

      # trie. This reference implementation is write only. The python version of this

      # is not expected to be actually used, since it wont provide performance

      # improvement over existing non-persistent C implementation.

      #

      # The nodemap is persisted as Trie using 4bits-address/16-entries block. each

      # revision can be adressed using its node shortest prefix.

      #

      # The trie is stored as a sequence of block. Each block contains 16 entries

      # (signed 64bit integer, big endian). Each entry can be one of the following:

      #

      #  * value >=  0 -> index of sub-block

      #  * value == -1 -> no value

      #  * value <  -1 -> a revision value: rev = -(value+10)

      #

      # The implementation focus on simplicity, not on performance. A Rust

      # implementation should provide a efficient version of the same binary

      # persistence. This reference python implementation is never meant to be

      # extensively use in production.

      def persistent_data(index):

          """return the persistent binary form for a nodemap for a given index

          """

          trie = _build_trie(index)

          return _persist_trie(trie)

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
      def update_persistent_data(index, root, max_idx, last_rev):

          """return the incremental update for persistent nodemap from a given index

          """

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
          changed_block, trie = _update_trie(index, root, last_rev)

          return (

              changed_block * S_BLOCK.size,

              _persist_trie(trie, existing_idx=max_idx),

          )

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      S_BLOCK = struct.Struct(">" + ("l" * 16))

      NO_ENTRY = -1

      # rev 0 need to be -2 because 0 is used by block, -1 is a special value.

      REV_OFFSET = 2

      def _transform_rev(rev):

          """Return the number used to represent the rev in the tree.

          (or retrieve a rev number from such representation)

          Note that this is an involution, a function equal to its inverse (i.e.

          which gives the identity when applied to itself).

          """

          return -(rev + REV_OFFSET)

      def _to_int(hex_digit):

          """turn an hexadecimal digit into a proper integer"""

          return int(hex_digit, 16)

        marmoute
    
nodemap: use an explicit "Block" object in the reference implementation...

              r44796
            
      class Block(dict):

          """represent a block of the Trie

          contains up to 16 entry indexed from 0 to 15"""

        marmoute
    
nodemap: keep track of the ondisk id of nodemap blocks...

              r44802
            
          def __init__(self):

              super(Block, self).__init__()

              # If this block exist on disk, here is its ID

              self.ondisk_id = None

        marmoute
    
nodemap: move the iteratio inside the Block object...

              r44797
            
          def __iter__(self):

              return iter(self.get(i) for i in range(16))

        marmoute
    
nodemap: use an explicit "Block" object in the reference implementation...

              r44796
            
        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      def _build_trie(index):

          """build a nodemap trie

          The nodemap stores revision number for each unique prefix.

          Each block is a dictionary with keys in `[0, 15]`. Values are either

          another block or a revision number.

          """

        marmoute
    
nodemap: use an explicit "Block" object in the reference implementation...

              r44796
            
          root = Block()

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          for rev in range(len(index)):

              hex = nodemod.hex(index[rev][7])

              _insert_into_block(index, 0, root, rev, hex)

          return root

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
      def _update_trie(index, root, last_rev):

          """consume"""

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
          changed = 0

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          for rev in range(last_rev + 1, len(index)):

              hex = nodemod.hex(index[rev][7])

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              changed += _insert_into_block(index, 0, root, rev, hex)

          return changed, root

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
      def _insert_into_block(index, level, block, current_rev, current_hex):

          """insert a new revision in a block

          index: the index we are adding revision for

          level: the depth of the current block in the trie

          block: the block currently being considered

          current_rev: the revision number we are adding

          current_hex: the hexadecimal representation of the of that revision

          """

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
          changed = 1

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          if block.ondisk_id is not None:

              block.ondisk_id = None

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          hex_digit = _to_int(current_hex[level : level + 1])

          entry = block.get(hex_digit)

          if entry is None:

              # no entry, simply store the revision number

              block[hex_digit] = current_rev

          elif isinstance(entry, dict):

              # need to recurse to an underlying block

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
              changed += _insert_into_block(

                  index, level + 1, entry, current_rev, current_hex

              )

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          else:

              # collision with a previously unique prefix, inserting new

              # vertices to fit both entry.

              other_hex = nodemod.hex(index[entry][7])

              other_rev = entry

        marmoute
    
nodemap: use an explicit "Block" object in the reference implementation...

              r44796
            
              new = Block()

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
              block[hex_digit] = new

              _insert_into_block(index, level + 1, new, other_rev, other_hex)

              _insert_into_block(index, level + 1, new, current_rev, current_hex)

        marmoute
    
nodemap: track the total and unused amount of data in the rawdata file...

              r44808
            
          return changed

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
      def _persist_trie(root, existing_idx=None):

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          """turn a nodemap trie into persistent binary data

          See `_build_trie` for nodemap trie structure"""

          block_map = {}

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          if existing_idx is not None:

              base_idx = existing_idx + 1

          else:

              base_idx = 0

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          chunks = []

          for tn in _walk_trie(root):

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
              if tn.ondisk_id is not None:

                  block_map[id(tn)] = tn.ondisk_id

              else:

                  block_map[id(tn)] = len(chunks) + base_idx

                  chunks.append(_persist_block(tn, block_map))

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          return b''.join(chunks)

      def _walk_trie(block):

          """yield all the block in a trie

          Children blocks are always yield before their parent block.

          """

        marmoute
    
nodemap: rename a variable to prevent shawoding "_"...

              r45186
            
          for (__, item) in sorted(block.items()):

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
              if isinstance(item, dict):

                  for sub_block in _walk_trie(item):

                      yield sub_block

          yield block

      def _persist_block(block_node, block_map):

          """produce persistent binary data for a single block

          Children block are assumed to be already persisted and present in

          block_map.

          """

        marmoute
    
nodemap: move the iteratio inside the Block object...

              r44797
            
          data = tuple(_to_value(v, block_map) for v in block_node)

        marmoute
    
nodemap: have some python code writing a nodemap in persistent binary form...

              r44788
            
          return S_BLOCK.pack(*data)

      def _to_value(item, block_map):

          """persist any value as an integer"""

          if item is None:

              return NO_ENTRY

          elif isinstance(item, dict):

              return block_map[id(item)]

          else:

              return _transform_rev(item)

        marmoute
    
nodemap: code to parse the persistent binary nodemap data...

              r44798
            
      def parse_data(data):

          """parse parse nodemap data into a nodemap Trie"""

          if (len(data) % S_BLOCK.size) != 0:

              msg = "nodemap data size is not a multiple of block size (%d): %d"

              raise error.Abort(msg % (S_BLOCK.size, len(data)))

          if not data:

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
              return Block(), None

        marmoute
    
nodemap: code to parse the persistent binary nodemap data...

              r44798
            
          block_map = {}

          new_blocks = []

          for i in range(0, len(data), S_BLOCK.size):

              block = Block()

        marmoute
    
nodemap: keep track of the ondisk id of nodemap blocks...

              r44802
            
              block.ondisk_id = len(block_map)

              block_map[block.ondisk_id] = block

        marmoute
    
nodemap: code to parse the persistent binary nodemap data...

              r44798
            
              block_data = data[i : i + S_BLOCK.size]

              values = S_BLOCK.unpack(block_data)

              new_blocks.append((block, values))

          for b, values in new_blocks:

              for idx, v in enumerate(values):

                  if v == NO_ENTRY:

                      continue

                  elif v >= 0:

                      b[idx] = block_map[v]

                  else:

                      b[idx] = _transform_rev(v)

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          return block, i // S_BLOCK.size

        marmoute
    
nodemap: add basic checking of the on disk nodemap content...

              r44799
            
      # debug utility

      def check_data(ui, index, data):

          """verify that the provided nodemap data are valid for the given idex"""

          ret = 0

          ui.status((b"revision in index:   %d\n") % len(index))

        marmoute
    
nodemap: introduce append-only incremental update of the persistent data...

              r44805
            
          root, __ = parse_data(data)

        marmoute
    
nodemap: add basic checking of the on disk nodemap content...

              r44799
            
          all_revs = set(_all_revisions(root))

          ui.status((b"revision in nodemap: %d\n") % len(all_revs))

          for r in range(len(index)):

              if r not in all_revs:

                  msg = b"  revision missing from nodemap: %d\n" % r

                  ui.write_err(msg)

                  ret = 1

              else:

                  all_revs.remove(r)

        marmoute
    
nodemap: all check that revision and nodes match in the nodemap...

              r44800
            
              nm_rev = _find_node(root, nodemod.hex(index[r][7]))

              if nm_rev is None:

                  msg = b"  revision node does not match any entries: %d\n" % r

                  ui.write_err(msg)

                  ret = 1

              elif nm_rev != r:

                  msg = (

                      b"  revision node does not match the expected revision: "

                      b"%d != %d\n" % (r, nm_rev)

                  )

                  ui.write_err(msg)

                  ret = 1

        marmoute
    
nodemap: add basic checking of the on disk nodemap content...

              r44799
            
          if all_revs:

              for r in sorted(all_revs):

                  msg = b"  extra revision in  nodemap: %d\n" % r

                  ui.write_err(msg)

              ret = 1

          return ret

      def _all_revisions(root):

          """return all revisions stored in a Trie"""

          for block in _walk_trie(root):

              for v in block:

                  if v is None or isinstance(v, Block):

                      continue

                  yield v

        marmoute
    
nodemap: all check that revision and nodes match in the nodemap...

              r44800
            
      def _find_node(block, node):

          """find the revision associated with a given node"""

          entry = block.get(_to_int(node[0:1]))

          if isinstance(entry, dict):

              return _find_node(entry, node[1:])

          return entry

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages