# HG changeset patch # User Maciej Fijalkowski # Date 2016-04-24 11:21:38 # Node ID 255274719dc1900af535627913310dd10b7fc9d0 # Parent 12769703d4ba37e1f0e98fa1cebacda9950f68fb pure: write a really lazy version of pure indexObject On PyPy this version performs reasonably well compared to C version. Example command is "hg id" which gets faster, depending on details of your operating system and hard drive (it's bottlenecked on stat mostly) There is potential for improvements by storing extra as a condensed struct too. diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -25,49 +25,111 @@ def dirstatetuple(*x): # x is a tuple return x -def parse_index2(data, inline): - def gettype(q): - return int(q & 0xFFFF) +indexformatng = ">Qiiiiii20s12x" +indexfirst = struct.calcsize('Q') +sizeint = struct.calcsize('i') +indexsize = struct.calcsize(indexformatng) + +def gettype(q): + return int(q & 0xFFFF) - def offset_type(offset, type): - return long(long(offset) << 16 | type) +def offset_type(offset, type): + return long(long(offset) << 16 | type) + +class BaseIndexObject(object): + def __len__(self): + return self._lgt + len(self._extra) + 1 + + def insert(self, i, tup): + assert i == -1 + self._extra.append(tup) - indexformatng = ">Qiiiiii20s12x" + def _fix_index(self, i): + if not isinstance(i, int): + raise TypeError("expecting int indexes") + if i < 0: + i = len(self) + i + if i < 0 or i >= len(self): + raise IndexError + return i - s = struct.calcsize(indexformatng) - index = [] - cache = None - off = 0 + def __getitem__(self, i): + i = self._fix_index(i) + if i == len(self) - 1: + return (0, 0, 0, -1, -1, -1, -1, nullid) + if i >= self._lgt: + return self._extra[i - self._lgt] + index = self._calculate_index(i) + r = struct.unpack(indexformatng, self._data[index:index + indexsize]) + if i == 0: + e = list(r) + type = gettype(e[0]) + e[0] = offset_type(0, type) + return tuple(e) + return r + +class IndexObject(BaseIndexObject): + def __init__(self, data): + assert len(data) % indexsize == 0 + self._data = data + self._lgt = len(data) // indexsize + self._extra = [] + + def _calculate_index(self, i): + return i * indexsize - l = len(data) - s - append = index.append - if inline: - cache = (0, data) - while off <= l: - e = _unpack(indexformatng, data[off:off + s]) - append(e) - if e[1] < 0: - break - off += e[1] + s - else: - while off <= l: - e = _unpack(indexformatng, data[off:off + s]) - append(e) - off += s + def __delitem__(self, i): + if not isinstance(i, slice) or not i.stop == -1 or not i.step is None: + raise ValueError("deleting slices only supports a:-1 with step 1") + i = self._fix_index(i.start) + if i < self._lgt: + self._data = self._data[:i * indexsize] + self._lgt = i + self._extra = [] + else: + self._extra = self._extra[:i - self._lgt] + +class InlinedIndexObject(BaseIndexObject): + def __init__(self, data, inline=0): + self._data = data + self._lgt = self._inline_scan(None) + self._inline_scan(self._lgt) + self._extra = [] - if off != len(data): - raise ValueError('corrupt index file') + def _inline_scan(self, lgt): + off = 0 + if lgt is not None: + self._offsets = [0] * lgt + count = 0 + while off <= len(self._data) - indexsize: + s, = struct.unpack('>i', + self._data[off + indexfirst:off + sizeint + indexfirst]) + if lgt is not None: + self._offsets[count] = off + count += 1 + off += indexsize + s + if off != len(self._data): + raise ValueError("corrupted data") + return count - if index: - e = list(index[0]) - type = gettype(e[0]) - e[0] = offset_type(0, type) - index[0] = tuple(e) + def __delitem__(self, i): + if not isinstance(i, slice) or not i.stop == -1 or not i.step is None: + raise ValueError("deleting slices only supports a:-1 with step 1") + i = self._fix_index(i.start) + if i < self._lgt: + self._offsets = self._offsets[:i] + self._lgt = i + self._extra = [] + else: + self._extra = self._extra[:i - self._lgt] - # add the magic null revision at -1 - index.append((0, 0, 0, -1, -1, -1, -1, nullid)) + def _calculate_index(self, i): + return self._offsets[i] - return index, cache +def parse_index2(data, inline): + if not inline: + return IndexObject(data), None + return InlinedIndexObject(data, inline), (0, data) def parse_dirstate(dmap, copymap, st): parents = [st[:20], st[20: 40]]