upstream/mercurial-mirror Files · mercurial/revlogutils/sidedata.py

nodemap: also use persistent nodemap for manifest...

nodemap: also use persistent nodemap for manifest The manifest as a different usage pattern than the changelog. First, while the lookup in changelog are not garanteed to match, the lookup in the manifest nodemap come from changelog and will exist in the manifest. In addition, looking up a manifest almost always result in unpacking a manifest an operation that rarely come cheap. Nevertheless, using a persistent nodemap provide a significant gain for some operations. For our measurementw, we use `hg cat --rev REV FILE` on the our reference mozilla-try. On this repository the persistent nodemap cache is about 29 MB in side for a total store side of 11,988 MB File with large history (file: b2g/config/gaia.json, revision: 195a1146daa0) no optimisation: 0.358s using mmap for index: 0.297s (-0.061s) persistent nodemap for changelog only: 0.275s (-0.024s) persistent nodemap for manifest too: 0.258s (-0.017s) File with small history (file: .hgignore, revision: 195a1146daa0) no optimisation: 0.377s using mmap for index: 0.296s (-0.061s) persistent nodemap for changelog only: 0.274s (-0.022s) persistent nodemap for manifest too: 0.257s (-0.017s) Same file but using a revision (8ba995b74e18) with a smaller manifest (3944829 bytes vs 10 bytes) no optimisation: 0.192s (-0.185s) using mmap for index: 0.131s (-0.061s) persistent nodemap for changelog only: 0.106s (-0.025s) persistent nodemap for manifest too: 0.087s (-0.019s) Differential Revision: https://phab.mercurial-scm.org/D8410

Augie Fackler - - Load All Authors

File last commit:

r44512:4ebd162f default


                r45290:640d5b3b

default

Download file

             sidedata.py
        
                    106 lines
            
             | 3.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / revlogutils / sidedata.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # sidedata.py - Logic around store extra data alongside revlog revisions

      #

      # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      """core code for "sidedata" support

      The "sidedata" are stored alongside the revision without actually being part of

      its content and not affecting its hash. It's main use cases is to cache

      important information related to a changesets.

      The current implementation is experimental and subject to changes. Do not rely

      on it in production.

      Sidedata are stored in the revlog itself, withing the revision rawtext. They

      are inserted, removed from it using the flagprocessors mechanism. The following

      format is currently used::

          initial header:

              <number of sidedata; 2 bytes>

          sidedata (repeated N times):

              <sidedata-key; 2 bytes>

              <sidedata-entry-length: 4 bytes>

              <sidedata-content-sha1-digest: 20 bytes>

              <sidedata-content; X bytes>

          normal raw text:

              <all bytes remaining in the rawtext>

      This is a simple and effective format. It should be enought to experiment with

      the concept.

      """

      from __future__ import absolute_import

      import struct

      from .. import error

      from ..utils import hashutil

      ## sidedata type constant

      # reserve a block for testing purposes.

      SD_TEST1 = 1

      SD_TEST2 = 2

      SD_TEST3 = 3

      SD_TEST4 = 4

      SD_TEST5 = 5

      SD_TEST6 = 6

      SD_TEST7 = 7

      # key to store copies related information

      SD_P1COPIES = 8

      SD_P2COPIES = 9

      SD_FILESADDED = 10

      SD_FILESREMOVED = 11

      # internal format constant

      SIDEDATA_HEADER = struct.Struct('>H')

      SIDEDATA_ENTRY = struct.Struct('>HL20s')

      def sidedatawriteprocessor(rl, text, sidedata):

          sidedata = list(sidedata.items())

          sidedata.sort()

          rawtext = [SIDEDATA_HEADER.pack(len(sidedata))]

          for key, value in sidedata:

              digest = hashutil.sha1(value).digest()

              rawtext.append(SIDEDATA_ENTRY.pack(key, len(value), digest))

          for key, value in sidedata:

              rawtext.append(value)

          rawtext.append(bytes(text))

          return b''.join(rawtext), False

      def sidedatareadprocessor(rl, text):

          sidedata = {}

          offset = 0

          (nbentry,) = SIDEDATA_HEADER.unpack(text[: SIDEDATA_HEADER.size])

          offset += SIDEDATA_HEADER.size

          dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)

          for i in range(nbentry):

              nextoffset = offset + SIDEDATA_ENTRY.size

              key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])

              offset = nextoffset

              # read the data associated with that entry

              nextdataoffset = dataoffset + size

              entrytext = text[dataoffset:nextdataoffset]

              readdigest = hashutil.sha1(entrytext).digest()

              if storeddigest != readdigest:

                  raise error.SidedataHashError(key, storeddigest, readdigest)

              sidedata[key] = entrytext

              dataoffset = nextdataoffset

          text = text[dataoffset:]

          return text, True, sidedata

      def sidedatarawprocessor(rl, text):

          # side data modifies rawtext and prevent rawtext hash validation

          return False

      processors = (

          sidedatareadprocessor,

          sidedatawriteprocessor,

          sidedatarawprocessor,

      )

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# sidedata.py - Logic around store extra data alongside revlog revisions
				#
				# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.
				"""core code for "sidedata" support

				The "sidedata" are stored alongside the revision without actually being part of
				its content and not affecting its hash. It's main use cases is to cache
				important information related to a changesets.

				The current implementation is experimental and subject to changes. Do not rely
				on it in production.

				Sidedata are stored in the revlog itself, withing the revision rawtext. They
				are inserted, removed from it using the flagprocessors mechanism. The following
				format is currently used::

				initial header:
				<number of sidedata; 2 bytes>
				sidedata (repeated N times):
				<sidedata-key; 2 bytes>
				<sidedata-entry-length: 4 bytes>
				<sidedata-content-sha1-digest: 20 bytes>
				<sidedata-content; X bytes>
				normal raw text:
				<all bytes remaining in the rawtext>

				This is a simple and effective format. It should be enought to experiment with
				the concept.
				"""

				from __future__ import absolute_import

				import struct

				from .. import error
				from ..utils import hashutil

				## sidedata type constant
				# reserve a block for testing purposes.
				SD_TEST1 = 1
				SD_TEST2 = 2
				SD_TEST3 = 3
				SD_TEST4 = 4
				SD_TEST5 = 5
				SD_TEST6 = 6
				SD_TEST7 = 7

				# key to store copies related information
				SD_P1COPIES = 8
				SD_P2COPIES = 9
				SD_FILESADDED = 10
				SD_FILESREMOVED = 11

				# internal format constant
				SIDEDATA_HEADER = struct.Struct('>H')
				SIDEDATA_ENTRY = struct.Struct('>HL20s')


				def sidedatawriteprocessor(rl, text, sidedata):
				sidedata = list(sidedata.items())
				sidedata.sort()
				rawtext = [SIDEDATA_HEADER.pack(len(sidedata))]
				for key, value in sidedata:
				digest = hashutil.sha1(value).digest()
				rawtext.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
				for key, value in sidedata:
				rawtext.append(value)
				rawtext.append(bytes(text))
				return b''.join(rawtext), False


				def sidedatareadprocessor(rl, text):
				sidedata = {}
				offset = 0
				(nbentry,) = SIDEDATA_HEADER.unpack(text[: SIDEDATA_HEADER.size])
				offset += SIDEDATA_HEADER.size
				dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
				for i in range(nbentry):
				nextoffset = offset + SIDEDATA_ENTRY.size
				key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])
				offset = nextoffset
				# read the data associated with that entry
				nextdataoffset = dataoffset + size
				entrytext = text[dataoffset:nextdataoffset]
				readdigest = hashutil.sha1(entrytext).digest()
				if storeddigest != readdigest:
				raise error.SidedataHashError(key, storeddigest, readdigest)
				sidedata[key] = entrytext
				dataoffset = nextdataoffset
				text = text[dataoffset:]
				return text, True, sidedata


				def sidedatarawprocessor(rl, text):
				# side data modifies rawtext and prevent rawtext hash validation
				return False


				processors = (
				sidedatareadprocessor,
				sidedatawriteprocessor,
				sidedatarawprocessor,
				)