upstream/mercurial-mirror Commit - r44795:7f4f7ef3

nodemap: add a optional `nodemap_add_full` method on indexes...

marmoute -

r44795:7f4f7ef3 default

parent child

mercurial/pure/parsers.py

0 +7 0

             # parsers.py - Python implementation of parsers.c
             #
             # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             import zlib
             from ..node import nullid, nullrev
             from .. import (
                 pycompat,
                 util,
             )
             from ..revlogutils import nodemap as nodemaputil
             stringio = pycompat.bytesio
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             # Some code below makes tuples directly because it's more convenient. However,
             # code outside this module should always use dirstatetuple.
             def dirstatetuple(*x):
                 # x is a tuple
                 return x
             indexformatng = b">Qiiiiii20s12x"
             indexfirst = struct.calcsize(b'Q')
             sizeint = struct.calcsize(b'i')
             indexsize = struct.calcsize(indexformatng)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return int(int(offset) << 16 | type)
             class BaseIndexObject(object):
                 @property
                 def nodemap(self):
                     msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self._nodemap
                 @util.propertycache
                 def _nodemap(self):
                     nodemap = nodemaputil.NodeMap({nullid: nullrev})
                     for r in range(0, len(self)):
                         n = self[r][7]
                         nodemap[n] = r
                     return nodemap
                 def has_node(self, node):
                     """return True if the node exist in the index"""
                     return node in self._nodemap
                 def rev(self, node):
                     """return a revision for a node
                     If the node is unknown, raise a RevlogError"""
                     return self._nodemap[node]
                 def get_rev(self, node):
                     """return a revision for a node
                     If the node is unknown, return None"""
                     return self._nodemap.get(node)
                 def _stripnodes(self, start):
                     if '_nodemap' in vars(self):
                         for r in range(start, len(self)):
                             n = self[r][7]
                             del self._nodemap[n]
                 def clearcaches(self):
                     self.__dict__.pop('_nodemap', None)
                 def __len__(self):
                     return self._lgt + len(self._extra)
                 def append(self, tup):
                     if '_nodemap' in vars(self):
                         self._nodemap[tup[7]] = len(self)
                     self._extra.append(tup)
                 def _check_index(self, i):
                     if not isinstance(i, int):
                         raise TypeError(b"expecting int indexes")
                     if i < 0 or i >= len(self):
                         raise IndexError
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     self._check_index(i)
                     if i >= self._lgt:
                         return self._extra[i - self._lgt]
                     index = self._calculate_index(i)
                     r = struct.unpack(indexformatng, self._data[index : index + indexsize])
                     if i == 0:
                         e = list(r)
                         type = gettype(e[0])
                         e[0] = offset_type(0, type)
                         return tuple(e)
                     return r
             class IndexObject(BaseIndexObject):
                 def __init__(self, data):
                     assert len(data) % indexsize == 0
                     self._data = data
                     self._lgt = len(data) // indexsize
                     self._extra = []
                 def _calculate_index(self, i):
                     return i * indexsize
                 def __delitem__(self, i):
                     if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
                         raise ValueError(b"deleting slices only supports a:-1 with step 1")
                     i = i.start
                     self._check_index(i)
                     self._stripnodes(i)
                     if i < self._lgt:
                         self._data = self._data[: i * indexsize]
                         self._lgt = i
                         self._extra = []
                     else:
                         self._extra = self._extra[: i - self._lgt]
             class PersistentNodeMapIndexObject(IndexObject):
                 """a Debug oriented class to test persistent nodemap
                 We need a simple python object to test API and higher level behavior. See
                 the Rust implementation for  more serious usage. This should be used only
                 through the dedicated `devel.persistent-nodemap` config.
                 """
+                def nodemap_data_all(self):
+                    """Return bytes containing a full serialization of a nodemap
+                    The nodemap should be valid for the full set of revisions in the
+                    index."""
+                    return nodemaputil.persistent_data(self)
             class InlinedIndexObject(BaseIndexObject):
                 def __init__(self, data, inline=0):
                     self._data = data
                     self._lgt = self._inline_scan(None)
                     self._inline_scan(self._lgt)
                     self._extra = []
                 def _inline_scan(self, lgt):
                     off = 0
                     if lgt is not None:
                         self._offsets = [0] * lgt
                     count = 0
                     while off <= len(self._data) - indexsize:
                         (s,) = struct.unpack(
                             b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
                         )
                         if lgt is not None:
                             self._offsets[count] = off
                         count += 1
                         off += indexsize + s
                     if off != len(self._data):
                         raise ValueError(b"corrupted data")
                     return count
                 def __delitem__(self, i):
                     if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
                         raise ValueError(b"deleting slices only supports a:-1 with step 1")
                     i = i.start
                     self._check_index(i)
                     self._stripnodes(i)
                     if i < self._lgt:
                         self._offsets = self._offsets[:i]
                         self._lgt = i
                         self._extra = []
                     else:
                         self._extra = self._extra[: i - self._lgt]
                 def _calculate_index(self, i):
                     return self._offsets[i]
             def parse_index2(data, inline):
                 if not inline:
                     return IndexObject(data), None
                 return InlinedIndexObject(data, inline), (0, data)
             def parse_index_devel_nodemap(data, inline):
                 """like parse_index2, but alway return a PersistentNodeMapIndexObject
                 """
                 return PersistentNodeMapIndexObject(data), None
             def parse_dirstate(dmap, copymap, st):
                 parents = [st[:20], st[20:40]]
                 # dereference fields so they will be local in loop
                 format = b">cllll"
                 e_size = struct.calcsize(format)
                 pos1 = 40
                 l = len(st)
                 # the inner loop
                 while pos1 < l:
                     pos2 = pos1 + e_size
                     e = _unpack(b">cllll", st[pos1:pos2])  # a literal here is faster
                     pos1 = pos2 + e[4]
                     f = st[pos2:pos1]
                     if b'\0' in f:
                         f, c = f.split(b'\0')
                         copymap[f] = c
                     dmap[f] = e[:4]
                 return parents
             def pack_dirstate(dmap, copymap, pl, now):
                 now = int(now)
                 cs = stringio()
                 write = cs.write
                 write(b"".join(pl))
                 for f, e in pycompat.iteritems(dmap):
                     if e[0] == b'n' and e[3] == now:
                         # The file was last modified "simultaneously" with the current
                         # write to dirstate (i.e. within the same second for file-
                         # systems with a granularity of 1 sec). This commonly happens
                         # for at least a couple of files on 'update'.
                         # The user could change the file without changing its size
                         # within the same second. Invalidate the file's mtime in
                         # dirstate, forcing future 'status' calls to compare the
                         # contents of the file if the size is the same. This prevents
                         # mistakenly treating such files as clean.
                         e = dirstatetuple(e[0], e[1], e[2], -1)
                         dmap[f] = e
                     if f in copymap:
                         f = b"%s\0%s" % (f, copymap[f])
                     e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
                     write(e)
                     write(f)
                 return cs.getvalue()

mercurial/revlogutils/nodemap.py

0 +5 -2

             # nodemap.py - nodemap related code and utilities
             #
             # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
             # Copyright 2019 George Racinet <georges.racinet@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             import re
             import struct
             from .. import (
                 error,
                 node as nodemod,
-                pycompat,
+                util,
             )
             class NodeMap(dict):
                 def __missing__(self, x):
                     raise error.RevlogError(b'unknown node: %s' % x)
             def persisted_data(revlog):
                 """read the nodemap for a revlog from disk"""
                 if revlog.nodemap_file is None:
                     return None
                 pdata = revlog.opener.tryread(revlog.nodemap_file)
                 if not pdata:
                     return None
                 offset = 0
                 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
                 if version != ONDISK_VERSION:
                     return None
                 offset += S_VERSION.size
                 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
                 offset += S_HEADER.size
                 uid = pdata[offset : offset + uuid_size]
                 filename = _rawdata_filepath(revlog, uid)
                 return revlog.opener.tryread(filename)
             def setup_persistent_nodemap(tr, revlog):
                 """Install whatever is needed transaction side to persist a nodemap on disk
                 (only actually persist the nodemap if this is relevant for this revlog)
                 """
                 if revlog._inline:
                     return  # inlined revlog are too small for this to be relevant
                 if revlog.nodemap_file is None:
                     return  # we do not use persistent_nodemap on this revlog
                 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
                 if tr.hasfinalize(callback_id):
                     return  # no need to register again
                 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
             def _persist_nodemap(tr, revlog):
                 """Write nodemap data on disk for a given revlog
                 """
                 if getattr(revlog, 'filteredrevs', ()):
                     raise error.ProgrammingError(
                         "cannot persist nodemap of a filtered changelog"
                     )
                 if revlog.nodemap_file is None:
                     msg = "calling persist nodemap on a revlog without the feature enableb"
                     raise error.ProgrammingError(msg)
-                data = persistent_data(revlog.index)
+                if util.safehasattr(revlog.index, "nodemap_data_all"):
+                    data = revlog.index.nodemap_data_all()
+                else:
+                    data = persistent_data(revlog.index)
                 uid = _make_uid()
                 datafile = _rawdata_filepath(revlog, uid)
                 olds = _other_rawdata_filepath(revlog, uid)
                 if olds:
                     realvfs = getattr(revlog, '_realopener', revlog.opener)
                     def cleanup(tr):
                         for oldfile in olds:
                             realvfs.tryunlink(oldfile)
                     callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
                     tr.addpostclose(callback_id, cleanup)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(datafile, b'w') as fd:
                     fd.write(data)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
                     fp.write(_serialize_docket(uid))
                 # EXP-TODO: if the transaction abort, we should remove the new data and
                 # reinstall the old one.
             ### Nodemap docket file
             #
             # The nodemap data are stored on disk using 2 files:
             #
             # * a raw data files containing a persistent nodemap
             #   (see `Nodemap Trie` section)
             #
             # * a small "docket" file containing medatadata
             #
             # While the nodemap data can be multiple tens of megabytes, the "docket" is
             # small, it is easy to update it automatically or to duplicated its content
             # during a transaction.
             #
             # Multiple raw data can exist at the same time (The currently valid one and a
             # new one beind used by an in progress transaction). To accomodate this, the
             # filename hosting the raw data has a variable parts. The exact filename is
             # specified inside the "docket" file.
             #
             # The docket file contains information to find, qualify and validate the raw
             # data. Its content is currently very light, but it will expand as the on disk
             # nodemap gains the necessary features to be used in production.
             # version 0 is experimental, no BC garantee, do no use outside of tests.
             ONDISK_VERSION = 0
             S_VERSION = struct.Struct(">B")
             S_HEADER = struct.Struct(">B")
             ID_SIZE = 8
             def _make_uid():
                 """return a new unique identifier.
                 The identifier is random and composed of ascii characters."""
                 return nodemod.hex(os.urandom(ID_SIZE))
             def _serialize_docket(uid):
                 """return serialized bytes for a docket using the passed uid"""
                 data = []
                 data.append(S_VERSION.pack(ONDISK_VERSION))
                 data.append(S_HEADER.pack(len(uid)))
                 data.append(uid)
                 return b''.join(data)
             def _rawdata_filepath(revlog, uid):
                 """The (vfs relative) nodemap's rawdata file for a given uid"""
                 prefix = revlog.nodemap_file[:-2]
                 return b"%s-%s.nd" % (prefix, uid)
             def _other_rawdata_filepath(revlog, uid):
                 prefix = revlog.nodemap_file[:-2]
                 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
                 new_file_path = _rawdata_filepath(revlog, uid)
                 new_file_name = revlog.opener.basename(new_file_path)
                 dirpath = revlog.opener.dirname(new_file_path)
                 others = []
                 for f in revlog.opener.listdir(dirpath):
                     if pattern.match(f) and f != new_file_name:
                         others.append(f)
                 return others
             ### Nodemap Trie
             #
             # This is a simple reference implementation to compute and persist a nodemap
             # trie. This reference implementation is write only. The python version of this
             # is not expected to be actually used, since it wont provide performance
             # improvement over existing non-persistent C implementation.
             #
             # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
             # revision can be adressed using its node shortest prefix.
             #
             # The trie is stored as a sequence of block. Each block contains 16 entries
             # (signed 64bit integer, big endian). Each entry can be one of the following:
             #
             #  * value >=  0 -> index of sub-block
             #  * value == -1 -> no value
             #  * value <  -1 -> a revision value: rev = -(value+10)
             #
             # The implementation focus on simplicity, not on performance. A Rust
             # implementation should provide a efficient version of the same binary
             # persistence. This reference python implementation is never meant to be
             # extensively use in production.
             def persistent_data(index):
                 """return the persistent binary form for a nodemap for a given index
                 """
                 trie = _build_trie(index)
                 return _persist_trie(trie)
             S_BLOCK = struct.Struct(">" + ("l" * 16))
             NO_ENTRY = -1
             # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
             REV_OFFSET = 2
             def _transform_rev(rev):
                 """Return the number used to represent the rev in the tree.
                 (or retrieve a rev number from such representation)
                 Note that this is an involution, a function equal to its inverse (i.e.
                 which gives the identity when applied to itself).
                 """
                 return -(rev + REV_OFFSET)
             def _to_int(hex_digit):
                 """turn an hexadecimal digit into a proper integer"""
                 return int(hex_digit, 16)
             def _build_trie(index):
                 """build a nodemap trie
                 The nodemap stores revision number for each unique prefix.
                 Each block is a dictionary with keys in `[0, 15]`. Values are either
                 another block or a revision number.
                 """
                 root = {}
                 for rev in range(len(index)):
                     hex = nodemod.hex(index[rev][7])
                     _insert_into_block(index, 0, root, rev, hex)
                 return root
             def _insert_into_block(index, level, block, current_rev, current_hex):
                 """insert a new revision in a block
                 index: the index we are adding revision for
                 level: the depth of the current block in the trie
                 block: the block currently being considered
                 current_rev: the revision number we are adding
                 current_hex: the hexadecimal representation of the of that revision
                 """
                 hex_digit = _to_int(current_hex[level : level + 1])
                 entry = block.get(hex_digit)
                 if entry is None:
                     # no entry, simply store the revision number
                     block[hex_digit] = current_rev
                 elif isinstance(entry, dict):
                     # need to recurse to an underlying block
                     _insert_into_block(index, level + 1, entry, current_rev, current_hex)
                 else:
                     # collision with a previously unique prefix, inserting new
                     # vertices to fit both entry.
                     other_hex = nodemod.hex(index[entry][7])
                     other_rev = entry
                     new = {}
                     block[hex_digit] = new
                     _insert_into_block(index, level + 1, new, other_rev, other_hex)
                     _insert_into_block(index, level + 1, new, current_rev, current_hex)
             def _persist_trie(root):
                 """turn a nodemap trie into persistent binary data
                 See `_build_trie` for nodemap trie structure"""
                 block_map = {}
                 chunks = []
                 for tn in _walk_trie(root):
                     block_map[id(tn)] = len(chunks)
                     chunks.append(_persist_block(tn, block_map))
                 return b''.join(chunks)
             def _walk_trie(block):
                 """yield all the block in a trie
                 Children blocks are always yield before their parent block.
                 """
                 for (_, item) in sorted(block.items()):
                     if isinstance(item, dict):
                         for sub_block in _walk_trie(item):
                             yield sub_block
                 yield block
             def _persist_block(block_node, block_map):
                 """produce persistent binary data for a single block
                 Children block are assumed to be already persisted and present in
                 block_map.
                 """
                 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
                 return S_BLOCK.pack(*data)
             def _to_value(item, block_map):
                 """persist any value as an integer"""
                 if item is None:
                     return NO_ENTRY
                 elif isinstance(item, dict):
                     return block_map[id(item)]
                 else:
                     return _transform_rev(item)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages