parsers.py
364 lines
| 11.5 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7700 | # parsers.py - Python implementation of parsers.c | ||
# | ||||
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7700 | |||
Gregory Szorc
|
r27339 | from __future__ import absolute_import | ||
import struct | ||||
import zlib | ||||
r43925 | from ..node import nullid, nullrev | |||
from .. import ( | ||||
pycompat, | ||||
util, | ||||
) | ||||
Augie Fackler
|
r43346 | |||
r44486 | from ..revlogutils import nodemap as nodemaputil | |||
Gregory Szorc
|
r36976 | stringio = pycompat.bytesio | ||
Martin Geisler
|
r7700 | |||
Pulkit Goyal
|
r31220 | |||
Martin Geisler
|
r7700 | _pack = struct.pack | ||
_unpack = struct.unpack | ||||
_compress = zlib.compress | ||||
_decompress = zlib.decompress | ||||
Siddharth Agarwal
|
r21809 | # Some code below makes tuples directly because it's more convenient. However, | ||
# code outside this module should always use dirstatetuple. | ||||
def dirstatetuple(*x): | ||||
# x is a tuple | ||||
return x | ||||
Augie Fackler
|
r43346 | |||
Maciej Fijalkowski
|
r29133 | def gettype(q): | ||
return int(q & 0xFFFF) | ||||
Matt Mackall
|
r7945 | |||
Augie Fackler
|
r43346 | |||
Maciej Fijalkowski
|
r29133 | def offset_type(offset, type): | ||
Martin von Zweigbergk
|
r31529 | return int(int(offset) << 16 | type) | ||
Maciej Fijalkowski
|
r29133 | |||
Augie Fackler
|
r43346 | |||
Maciej Fijalkowski
|
r29133 | class BaseIndexObject(object): | ||
Raphaël Gomès
|
r47380 | # Format of an index entry according to Python's `struct` language | ||
Raphaël Gomès
|
r47136 | index_format = b">Qiiiiii20s12x" | ||
Raphaël Gomès
|
r47380 | # Size of a C unsigned long long int, platform independent | ||
big_int_size = struct.calcsize(b'>Q') | ||||
# Size of a C long int, platform independent | ||||
int_size = struct.calcsize(b'>i') | ||||
# Size of the entire index format | ||||
Raphaël Gomès
|
r47136 | index_size = struct.calcsize(index_format) | ||
Raphaël Gomès
|
r47380 | # An empty index entry, used as a default value to be overridden, or nullrev | ||
Raphaël Gomès
|
r47136 | null_item = (0, 0, 0, -1, -1, -1, -1, nullid) | ||
r43974 | @property | |||
def nodemap(self): | ||||
Denis Laxalde
|
r44018 | msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]" | ||
r43974 | util.nouideprecwarn(msg, b'5.3', stacklevel=2) | |||
return self._nodemap | ||||
r43925 | @util.propertycache | |||
r43974 | def _nodemap(self): | |||
r44486 | nodemap = nodemaputil.NodeMap({nullid: nullrev}) | |||
r43925 | for r in range(0, len(self)): | |||
n = self[r][7] | ||||
nodemap[n] = r | ||||
return nodemap | ||||
r43934 | def has_node(self, node): | |||
"""return True if the node exist in the index""" | ||||
r43974 | return node in self._nodemap | |||
r43934 | ||||
r43952 | def rev(self, node): | |||
"""return a revision for a node | ||||
If the node is unknown, raise a RevlogError""" | ||||
r43974 | return self._nodemap[node] | |||
r43952 | ||||
r43954 | def get_rev(self, node): | |||
"""return a revision for a node | ||||
If the node is unknown, return None""" | ||||
r43974 | return self._nodemap.get(node) | |||
r43954 | ||||
r43933 | def _stripnodes(self, start): | |||
r43974 | if '_nodemap' in vars(self): | |||
r43933 | for r in range(start, len(self)): | |||
n = self[r][7] | ||||
r43974 | del self._nodemap[n] | |||
r43933 | ||||
r43925 | def clearcaches(self): | |||
r43974 | self.__dict__.pop('_nodemap', None) | |||
r43925 | ||||
Maciej Fijalkowski
|
r29133 | def __len__(self): | ||
Martin von Zweigbergk
|
r38887 | return self._lgt + len(self._extra) | ||
Maciej Fijalkowski
|
r29133 | |||
Martin von Zweigbergk
|
r38886 | def append(self, tup): | ||
r43974 | if '_nodemap' in vars(self): | |||
self._nodemap[tup[7]] = len(self) | ||||
Raphaël Gomès
|
r47136 | data = _pack(self.index_format, *tup) | ||
Joerg Sonnenberger
|
r46548 | self._extra.append(data) | ||
Matt Mackall
|
r7945 | |||
Martin von Zweigbergk
|
r39251 | def _check_index(self, i): | ||
Maciej Fijalkowski
|
r29133 | if not isinstance(i, int): | ||
Augie Fackler
|
r43347 | raise TypeError(b"expecting int indexes") | ||
Martin von Zweigbergk
|
r39250 | if i < 0 or i >= len(self): | ||
Maciej Fijalkowski
|
r29133 | raise IndexError | ||
Matt Mackall
|
r7945 | |||
Maciej Fijalkowski
|
r29133 | def __getitem__(self, i): | ||
Augie Fackler
|
r39082 | if i == -1: | ||
Raphaël Gomès
|
r47136 | return self.null_item | ||
Martin von Zweigbergk
|
r39251 | self._check_index(i) | ||
Maciej Fijalkowski
|
r29133 | if i >= self._lgt: | ||
Joerg Sonnenberger
|
r46548 | data = self._extra[i - self._lgt] | ||
else: | ||||
index = self._calculate_index(i) | ||||
Raphaël Gomès
|
r47136 | data = self._data[index : index + self.index_size] | ||
r = _unpack(self.index_format, data) | ||||
Joerg Sonnenberger
|
r46548 | if self._lgt and i == 0: | ||
r = (offset_type(0, gettype(r[0])),) + r[1:] | ||||
Maciej Fijalkowski
|
r29133 | return r | ||
Augie Fackler
|
r43346 | |||
Maciej Fijalkowski
|
r29133 | class IndexObject(BaseIndexObject): | ||
def __init__(self, data): | ||||
Raphaël Gomès
|
r47136 | assert len(data) % self.index_size == 0 | ||
Maciej Fijalkowski
|
r29133 | self._data = data | ||
Raphaël Gomès
|
r47136 | self._lgt = len(data) // self.index_size | ||
Maciej Fijalkowski
|
r29133 | self._extra = [] | ||
def _calculate_index(self, i): | ||||
Raphaël Gomès
|
r47136 | return i * self.index_size | ||
Matt Mackall
|
r13253 | |||
Maciej Fijalkowski
|
r29133 | def __delitem__(self, i): | ||
Alex Gaynor
|
r34332 | if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: | ||
Augie Fackler
|
r43347 | raise ValueError(b"deleting slices only supports a:-1 with step 1") | ||
Martin von Zweigbergk
|
r39251 | i = i.start | ||
self._check_index(i) | ||||
r43933 | self._stripnodes(i) | |||
Maciej Fijalkowski
|
r29133 | if i < self._lgt: | ||
Raphaël Gomès
|
r47136 | self._data = self._data[: i * self.index_size] | ||
Maciej Fijalkowski
|
r29133 | self._lgt = i | ||
self._extra = [] | ||||
else: | ||||
Augie Fackler
|
r43346 | self._extra = self._extra[: i - self._lgt] | ||
Maciej Fijalkowski
|
r29133 | |||
r44794 | class PersistentNodeMapIndexObject(IndexObject): | |||
"""a Debug oriented class to test persistent nodemap | ||||
We need a simple python object to test API and higher level behavior. See | ||||
the Rust implementation for more serious usage. This should be used only | ||||
through the dedicated `devel.persistent-nodemap` config. | ||||
""" | ||||
r44795 | def nodemap_data_all(self): | |||
"""Return bytes containing a full serialization of a nodemap | ||||
The nodemap should be valid for the full set of revisions in the | ||||
index.""" | ||||
return nodemaputil.persistent_data(self) | ||||
r44805 | def nodemap_data_incremental(self): | |||
"""Return bytes containing a incremental update to persistent nodemap | ||||
This containst the data for an append-only update of the data provided | ||||
in the last call to `update_nodemap_data`. | ||||
""" | ||||
if self._nm_root is None: | ||||
return None | ||||
r44809 | docket = self._nm_docket | |||
r44808 | changed, data = nodemaputil.update_persistent_data( | |||
r44809 | self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev | |||
r44805 | ) | |||
r44809 | ||||
self._nm_root = self._nm_max_idx = self._nm_docket = None | ||||
return docket, changed, data | ||||
r44805 | ||||
r44807 | def update_nodemap_data(self, docket, nm_data): | |||
"""provide full block of persisted binary data for a nodemap | ||||
r44801 | ||||
The data are expected to come from disk. See `nodemap_data_all` for a | ||||
produceur of such data.""" | ||||
if nm_data is not None: | ||||
r44805 | self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data) | |||
if self._nm_root: | ||||
r44809 | self._nm_docket = docket | |||
r44805 | else: | |||
r44809 | self._nm_root = self._nm_max_idx = self._nm_docket = None | |||
r44801 | ||||
r44794 | ||||
Maciej Fijalkowski
|
r29133 | class InlinedIndexObject(BaseIndexObject): | ||
def __init__(self, data, inline=0): | ||||
self._data = data | ||||
self._lgt = self._inline_scan(None) | ||||
self._inline_scan(self._lgt) | ||||
self._extra = [] | ||||
Martin Geisler
|
r7700 | |||
Maciej Fijalkowski
|
r29133 | def _inline_scan(self, lgt): | ||
off = 0 | ||||
if lgt is not None: | ||||
self._offsets = [0] * lgt | ||||
count = 0 | ||||
Raphaël Gomès
|
r47136 | while off <= len(self._data) - self.index_size: | ||
start = off + self.big_int_size | ||||
Augie Fackler
|
r43346 | (s,) = struct.unpack( | ||
Raphaël Gomès
|
r47136 | b'>i', | ||
self._data[start : start + self.int_size], | ||||
Augie Fackler
|
r43346 | ) | ||
Maciej Fijalkowski
|
r29133 | if lgt is not None: | ||
self._offsets[count] = off | ||||
count += 1 | ||||
Raphaël Gomès
|
r47136 | off += self.index_size + s | ||
Maciej Fijalkowski
|
r29133 | if off != len(self._data): | ||
Augie Fackler
|
r43347 | raise ValueError(b"corrupted data") | ||
Maciej Fijalkowski
|
r29133 | return count | ||
Augie Fackler
|
r14421 | |||
Maciej Fijalkowski
|
r29133 | def __delitem__(self, i): | ||
Alex Gaynor
|
r34332 | if not isinstance(i, slice) or not i.stop == -1 or i.step is not None: | ||
Augie Fackler
|
r43347 | raise ValueError(b"deleting slices only supports a:-1 with step 1") | ||
Martin von Zweigbergk
|
r39251 | i = i.start | ||
self._check_index(i) | ||||
r43933 | self._stripnodes(i) | |||
Maciej Fijalkowski
|
r29133 | if i < self._lgt: | ||
self._offsets = self._offsets[:i] | ||||
self._lgt = i | ||||
self._extra = [] | ||||
else: | ||||
Augie Fackler
|
r43346 | self._extra = self._extra[: i - self._lgt] | ||
Martin Geisler
|
r7700 | |||
Maciej Fijalkowski
|
r29133 | def _calculate_index(self, i): | ||
return self._offsets[i] | ||||
Martin Geisler
|
r7700 | |||
Augie Fackler
|
r43346 | |||
Raphaël Gomès
|
r47438 | def parse_index2(data, inline, revlogv2=False): | ||
Maciej Fijalkowski
|
r29133 | if not inline: | ||
Raphaël Gomès
|
r47438 | cls = IndexObject2 if revlogv2 else IndexObject | ||
return cls(data), None | ||||
cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject | ||||
return cls(data, inline), (0, data) | ||||
class Index2Mixin(object): | ||||
# 6 bytes: offset | ||||
# 2 bytes: flags | ||||
# 4 bytes: compressed length | ||||
# 4 bytes: uncompressed length | ||||
# 4 bytes: base rev | ||||
# 4 bytes: link rev | ||||
# 4 bytes: parent 1 rev | ||||
# 4 bytes: parent 2 rev | ||||
# 32 bytes: nodeid | ||||
# 8 bytes: sidedata offset | ||||
# 4 bytes: sidedata compressed length | ||||
# 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) | ||||
index_format = b">Qiiiiii20s12xQi20x" | ||||
index_size = struct.calcsize(index_format) | ||||
assert index_size == 96, index_size | ||||
null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0) | ||||
Raphaël Gomès
|
r47451 | def replace_sidedata_info(self, i, sidedata_offset, sidedata_length): | ||
""" | ||||
Replace an existing index entry's sidedata offset and length with new | ||||
ones. | ||||
This cannot be used outside of the context of sidedata rewriting, | ||||
inside the transaction that creates the revision `i`. | ||||
""" | ||||
if i < 0: | ||||
raise KeyError | ||||
self._check_index(i) | ||||
sidedata_format = b">Qi" | ||||
packed_size = struct.calcsize(sidedata_format) | ||||
if i >= self._lgt: | ||||
packed = _pack(sidedata_format, sidedata_offset, sidedata_length) | ||||
old = self._extra[i - self._lgt] | ||||
new = old[:64] + packed + old[64 + packed_size :] | ||||
self._extra[i - self._lgt] = new | ||||
else: | ||||
msg = b"cannot rewrite entries outside of this transaction" | ||||
raise KeyError(msg) | ||||
Raphaël Gomès
|
r47438 | |||
class IndexObject2(Index2Mixin, IndexObject): | ||||
pass | ||||
class InlinedIndexObject2(Index2Mixin, InlinedIndexObject): | ||||
def _inline_scan(self, lgt): | ||||
sidedata_length_pos = 72 | ||||
off = 0 | ||||
if lgt is not None: | ||||
self._offsets = [0] * lgt | ||||
count = 0 | ||||
while off <= len(self._data) - self.index_size: | ||||
start = off + self.big_int_size | ||||
(data_size,) = struct.unpack( | ||||
b'>i', | ||||
self._data[start : start + self.int_size], | ||||
) | ||||
start = off + sidedata_length_pos | ||||
(side_data_size,) = struct.unpack( | ||||
b'>i', self._data[start : start + self.int_size] | ||||
) | ||||
if lgt is not None: | ||||
self._offsets[count] = off | ||||
count += 1 | ||||
off += self.index_size + data_size + side_data_size | ||||
if off != len(self._data): | ||||
raise ValueError(b"corrupted data") | ||||
return count | ||||
Martin Geisler
|
r7700 | |||
Augie Fackler
|
r43346 | |||
r44794 | def parse_index_devel_nodemap(data, inline): | |||
Augie Fackler
|
r46554 | """like parse_index2, but alway return a PersistentNodeMapIndexObject""" | ||
r44794 | return PersistentNodeMapIndexObject(data), None | |||
Martin Geisler
|
r7700 | def parse_dirstate(dmap, copymap, st): | ||
Augie Fackler
|
r43346 | parents = [st[:20], st[20:40]] | ||
Mads Kiilerich
|
r17425 | # dereference fields so they will be local in loop | ||
Augie Fackler
|
r43347 | format = b">cllll" | ||
Matt Mackall
|
r7945 | e_size = struct.calcsize(format) | ||
Martin Geisler
|
r7700 | pos1 = 40 | ||
l = len(st) | ||||
# the inner loop | ||||
while pos1 < l: | ||||
pos2 = pos1 + e_size | ||||
Augie Fackler
|
r43347 | e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster | ||
Martin Geisler
|
r7700 | pos1 = pos2 + e[4] | ||
f = st[pos2:pos1] | ||||
Augie Fackler
|
r43347 | if b'\0' in f: | ||
f, c = f.split(b'\0') | ||||
Martin Geisler
|
r7700 | copymap[f] = c | ||
dmap[f] = e[:4] | ||||
return parents | ||||
Siddharth Agarwal
|
r18567 | |||
Augie Fackler
|
r43346 | |||
Siddharth Agarwal
|
r18567 | def pack_dirstate(dmap, copymap, pl, now): | ||
now = int(now) | ||||
timeless
|
r28861 | cs = stringio() | ||
Siddharth Agarwal
|
r18567 | write = cs.write | ||
Augie Fackler
|
r43347 | write(b"".join(pl)) | ||
Gregory Szorc
|
r43376 | for f, e in pycompat.iteritems(dmap): | ||
Augie Fackler
|
r43347 | if e[0] == b'n' and e[3] == now: | ||
Siddharth Agarwal
|
r18567 | # The file was last modified "simultaneously" with the current | ||
# write to dirstate (i.e. within the same second for file- | ||||
# systems with a granularity of 1 sec). This commonly happens | ||||
# for at least a couple of files on 'update'. | ||||
# The user could change the file without changing its size | ||||
Siddharth Agarwal
|
r19652 | # within the same second. Invalidate the file's mtime in | ||
Siddharth Agarwal
|
r18567 | # dirstate, forcing future 'status' calls to compare the | ||
Siddharth Agarwal
|
r19652 | # contents of the file if the size is the same. This prevents | ||
# mistakenly treating such files as clean. | ||||
Siddharth Agarwal
|
r21809 | e = dirstatetuple(e[0], e[1], e[2], -1) | ||
Siddharth Agarwal
|
r18567 | dmap[f] = e | ||
if f in copymap: | ||||
Augie Fackler
|
r43347 | f = b"%s\0%s" % (f, copymap[f]) | ||
e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f)) | ||||
Siddharth Agarwal
|
r18567 | write(e) | ||
write(f) | ||||
return cs.getvalue() | ||||