##// END OF EJS Templates
changing-files: add the ability to track merged files too...
changing-files: add the ability to track merged files too The set of merged files is used when doing changeset centric copy tracing (cf `is_merged` in `mercurial/copies.py`. So tracking (and persisting) this set will be useful. We start with adding the attribute on the new object. Differential Revision: https://phab.mercurial-scm.org/D9087

File last commit:

r44809:1d2b37de default
r46186:e5578dbe default
Show More
parsers.py
286 lines | 8.6 KiB | text/x-python | PythonLexer
Martin Geisler
pure Python implementation of parsers.c
r7700 # parsers.py - Python implementation of parsers.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
pure Python implementation of parsers.c
r7700
Gregory Szorc
parsers: use absolute_import
r27339 from __future__ import absolute_import
import struct
import zlib
revlog: move the nodemap into the index object (for pure)...
r43925 from ..node import nullid, nullrev
from .. import (
pycompat,
util,
)
Augie Fackler
formatting: blacken the codebase...
r43346
revlogutils: move the NodeMap class in a dedicated nodemap module...
r44486 from ..revlogutils import nodemap as nodemaputil
Gregory Szorc
util: prefer "bytesio" to "stringio"...
r36976 stringio = pycompat.bytesio
Martin Geisler
pure Python implementation of parsers.c
r7700
Pulkit Goyal
parsers: alias long to int on Python 3
r31220
Martin Geisler
pure Python implementation of parsers.c
r7700 _pack = struct.pack
_unpack = struct.unpack
_compress = zlib.compress
_decompress = zlib.decompress
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 # Some code below makes tuples directly because it's more convenient. However,
# code outside this module should always use dirstatetuple.
def dirstatetuple(*x):
# x is a tuple
return x
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 indexformatng = b">Qiiiiii20s12x"
indexfirst = struct.calcsize(b'Q')
sizeint = struct.calcsize(b'i')
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 indexsize = struct.calcsize(indexformatng)
Augie Fackler
formatting: blacken the codebase...
r43346
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def gettype(q):
return int(q & 0xFFFF)
Matt Mackall
pure/parsers: fix circular imports, import mercurial modules properly
r7945
Augie Fackler
formatting: blacken the codebase...
r43346
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def offset_type(offset, type):
Martin von Zweigbergk
pure: use int instead of long...
r31529 return int(int(offset) << 16 | type)
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133
Augie Fackler
formatting: blacken the codebase...
r43346
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 class BaseIndexObject(object):
revlog: deprecate direct `nodemap` access...
r43974 @property
def nodemap(self):
Denis Laxalde
py3: pass a bytes value for "msg" to nouideprecwarn()...
r44018 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
revlog: deprecate direct `nodemap` access...
r43974 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
return self._nodemap
revlog: move the nodemap into the index object (for pure)...
r43925 @util.propertycache
revlog: deprecate direct `nodemap` access...
r43974 def _nodemap(self):
revlogutils: move the NodeMap class in a dedicated nodemap module...
r44486 nodemap = nodemaputil.NodeMap({nullid: nullrev})
revlog: move the nodemap into the index object (for pure)...
r43925 for r in range(0, len(self)):
n = self[r][7]
nodemap[n] = r
return nodemap
index: add a `has_node` method (API)...
r43934 def has_node(self, node):
"""return True if the node exist in the index"""
revlog: deprecate direct `nodemap` access...
r43974 return node in self._nodemap
index: add a `has_node` method (API)...
r43934
index: add a `rev` method (API)...
r43952 def rev(self, node):
"""return a revision for a node
If the node is unknown, raise a RevlogError"""
revlog: deprecate direct `nodemap` access...
r43974 return self._nodemap[node]
index: add a `rev` method (API)...
r43952
index: add a `get_rev` method (API)...
r43954 def get_rev(self, node):
"""return a revision for a node
If the node is unknown, return None"""
revlog: deprecate direct `nodemap` access...
r43974 return self._nodemap.get(node)
index: add a `get_rev` method (API)...
r43954
revlog: deal with nodemap deletion within the index...
r43933 def _stripnodes(self, start):
revlog: deprecate direct `nodemap` access...
r43974 if '_nodemap' in vars(self):
revlog: deal with nodemap deletion within the index...
r43933 for r in range(start, len(self)):
n = self[r][7]
revlog: deprecate direct `nodemap` access...
r43974 del self._nodemap[n]
revlog: deal with nodemap deletion within the index...
r43933
revlog: move the nodemap into the index object (for pure)...
r43925 def clearcaches(self):
revlog: deprecate direct `nodemap` access...
r43974 self.__dict__.pop('_nodemap', None)
revlog: move the nodemap into the index object (for pure)...
r43925
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def __len__(self):
Martin von Zweigbergk
index: don't include nullid in len()...
r38887 return self._lgt + len(self._extra)
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133
Martin von Zweigbergk
index: replace insert(-1, e) method by append(e) method...
r38886 def append(self, tup):
revlog: deprecate direct `nodemap` access...
r43974 if '_nodemap' in vars(self):
self._nodemap[tup[7]] = len(self)
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 self._extra.append(tup)
Matt Mackall
pure/parsers: fix circular imports, import mercurial modules properly
r7945
Martin von Zweigbergk
index: rename _fix_index() since it no longer fixes the index...
r39251 def _check_index(self, i):
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 if not isinstance(i, int):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise TypeError(b"expecting int indexes")
Martin von Zweigbergk
index: don't include nullid in boundary check in pure code...
r39250 if i < 0 or i >= len(self):
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 raise IndexError
Matt Mackall
pure/parsers: fix circular imports, import mercurial modules properly
r7945
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def __getitem__(self, i):
Augie Fackler
parsers: adjust pure-python version to mimic a3dacabd476b...
r39082 if i == -1:
Martin von Zweigbergk
index: handle index[-1] as nullid more explicitly...
r38883 return (0, 0, 0, -1, -1, -1, -1, nullid)
Martin von Zweigbergk
index: rename _fix_index() since it no longer fixes the index...
r39251 self._check_index(i)
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 if i >= self._lgt:
return self._extra[i - self._lgt]
index = self._calculate_index(i)
Augie Fackler
formatting: blacken the codebase...
r43346 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 if i == 0:
e = list(r)
type = gettype(e[0])
e[0] = offset_type(0, type)
return tuple(e)
return r
Augie Fackler
formatting: blacken the codebase...
r43346
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 class IndexObject(BaseIndexObject):
def __init__(self, data):
assert len(data) % indexsize == 0
self._data = data
self._lgt = len(data) // indexsize
self._extra = []
def _calculate_index(self, i):
return i * indexsize
Matt Mackall
revlog: remove lazy index
r13253
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def __delitem__(self, i):
Alex Gaynor
style: always use `x is not None` instead of `not x is None`...
r34332 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"deleting slices only supports a:-1 with step 1")
Martin von Zweigbergk
index: rename _fix_index() since it no longer fixes the index...
r39251 i = i.start
self._check_index(i)
revlog: deal with nodemap deletion within the index...
r43933 self._stripnodes(i)
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 if i < self._lgt:
Augie Fackler
formatting: blacken the codebase...
r43346 self._data = self._data[: i * indexsize]
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 self._lgt = i
self._extra = []
else:
Augie Fackler
formatting: blacken the codebase...
r43346 self._extra = self._extra[: i - self._lgt]
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133
nodemap: add a (python) index class for persistent nodemap testing...
r44794 class PersistentNodeMapIndexObject(IndexObject):
"""a Debug oriented class to test persistent nodemap
We need a simple python object to test API and higher level behavior. See
the Rust implementation for more serious usage. This should be used only
through the dedicated `devel.persistent-nodemap` config.
"""
nodemap: add a optional `nodemap_add_full` method on indexes...
r44795 def nodemap_data_all(self):
"""Return bytes containing a full serialization of a nodemap
The nodemap should be valid for the full set of revisions in the
index."""
return nodemaputil.persistent_data(self)
nodemap: introduce append-only incremental update of the persistent data...
r44805 def nodemap_data_incremental(self):
"""Return bytes containing a incremental update to persistent nodemap
This containst the data for an append-only update of the data provided
in the last call to `update_nodemap_data`.
"""
if self._nm_root is None:
return None
nodemap: double check the source docket when doing incremental update...
r44809 docket = self._nm_docket
nodemap: track the total and unused amount of data in the rawdata file...
r44808 changed, data = nodemaputil.update_persistent_data(
nodemap: double check the source docket when doing incremental update...
r44809 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
nodemap: introduce append-only incremental update of the persistent data...
r44805 )
nodemap: double check the source docket when doing incremental update...
r44809
self._nm_root = self._nm_max_idx = self._nm_docket = None
return docket, changed, data
nodemap: introduce append-only incremental update of the persistent data...
r44805
nodemap: track the maximum revision tracked in the nodemap...
r44807 def update_nodemap_data(self, docket, nm_data):
"""provide full block of persisted binary data for a nodemap
nodemap: provide the on disk data to indexes who support it...
r44801
The data are expected to come from disk. See `nodemap_data_all` for a
produceur of such data."""
if nm_data is not None:
nodemap: introduce append-only incremental update of the persistent data...
r44805 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
if self._nm_root:
nodemap: double check the source docket when doing incremental update...
r44809 self._nm_docket = docket
nodemap: introduce append-only incremental update of the persistent data...
r44805 else:
nodemap: double check the source docket when doing incremental update...
r44809 self._nm_root = self._nm_max_idx = self._nm_docket = None
nodemap: provide the on disk data to indexes who support it...
r44801
nodemap: add a (python) index class for persistent nodemap testing...
r44794
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 class InlinedIndexObject(BaseIndexObject):
def __init__(self, data, inline=0):
self._data = data
self._lgt = self._inline_scan(None)
self._inline_scan(self._lgt)
self._extra = []
Martin Geisler
pure Python implementation of parsers.c
r7700
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def _inline_scan(self, lgt):
off = 0
if lgt is not None:
self._offsets = [0] * lgt
count = 0
while off <= len(self._data) - indexsize:
Augie Fackler
formatting: blacken the codebase...
r43346 (s,) = struct.unpack(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
Augie Fackler
formatting: blacken the codebase...
r43346 )
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 if lgt is not None:
self._offsets[count] = off
count += 1
off += indexsize + s
if off != len(self._data):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"corrupted data")
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 return count
Augie Fackler
pure parsers: properly detect corrupt index files...
r14421
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def __delitem__(self, i):
Alex Gaynor
style: always use `x is not None` instead of `not x is None`...
r34332 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"deleting slices only supports a:-1 with step 1")
Martin von Zweigbergk
index: rename _fix_index() since it no longer fixes the index...
r39251 i = i.start
self._check_index(i)
revlog: deal with nodemap deletion within the index...
r43933 self._stripnodes(i)
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 if i < self._lgt:
self._offsets = self._offsets[:i]
self._lgt = i
self._extra = []
else:
Augie Fackler
formatting: blacken the codebase...
r43346 self._extra = self._extra[: i - self._lgt]
Martin Geisler
pure Python implementation of parsers.c
r7700
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def _calculate_index(self, i):
return self._offsets[i]
Martin Geisler
pure Python implementation of parsers.c
r7700
Augie Fackler
formatting: blacken the codebase...
r43346
Maciej Fijalkowski
pure: write a really lazy version of pure indexObject...
r29133 def parse_index2(data, inline):
if not inline:
return IndexObject(data), None
return InlinedIndexObject(data, inline), (0, data)
Martin Geisler
pure Python implementation of parsers.c
r7700
Augie Fackler
formatting: blacken the codebase...
r43346
nodemap: add a (python) index class for persistent nodemap testing...
r44794 def parse_index_devel_nodemap(data, inline):
"""like parse_index2, but alway return a PersistentNodeMapIndexObject
"""
return PersistentNodeMapIndexObject(data), None
Martin Geisler
pure Python implementation of parsers.c
r7700 def parse_dirstate(dmap, copymap, st):
Augie Fackler
formatting: blacken the codebase...
r43346 parents = [st[:20], st[20:40]]
Mads Kiilerich
fix wording and not-completely-trivial spelling errors and bad docstrings
r17425 # dereference fields so they will be local in loop
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 format = b">cllll"
Matt Mackall
pure/parsers: fix circular imports, import mercurial modules properly
r7945 e_size = struct.calcsize(format)
Martin Geisler
pure Python implementation of parsers.c
r7700 pos1 = 40
l = len(st)
# the inner loop
while pos1 < l:
pos2 = pos1 + e_size
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
Martin Geisler
pure Python implementation of parsers.c
r7700 pos1 = pos2 + e[4]
f = st[pos2:pos1]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if b'\0' in f:
f, c = f.split(b'\0')
Martin Geisler
pure Python implementation of parsers.c
r7700 copymap[f] = c
dmap[f] = e[:4]
return parents
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567
Augie Fackler
formatting: blacken the codebase...
r43346
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 def pack_dirstate(dmap, copymap, pl, now):
now = int(now)
timeless
pycompat: switch to util.stringio for py3 compat
r28861 cs = stringio()
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 write = cs.write
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 write(b"".join(pl))
Gregory Szorc
py3: finish porting iteritems() to pycompat and remove source transformer...
r43376 for f, e in pycompat.iteritems(dmap):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if e[0] == b'n' and e[3] == now:
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 # The file was last modified "simultaneously" with the current
# write to dirstate (i.e. within the same second for file-
# systems with a granularity of 1 sec). This commonly happens
# for at least a couple of files on 'update'.
# The user could change the file without changing its size
Siddharth Agarwal
pack_dirstate: only invalidate mtime for files written in the last second...
r19652 # within the same second. Invalidate the file's mtime in
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 # dirstate, forcing future 'status' calls to compare the
Siddharth Agarwal
pack_dirstate: only invalidate mtime for files written in the last second...
r19652 # contents of the file if the size is the same. This prevents
# mistakenly treating such files as clean.
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 e = dirstatetuple(e[0], e[1], e[2], -1)
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 dmap[f] = e
if f in copymap:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = b"%s\0%s" % (f, copymap[f])
e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 write(e)
write(f)
return cs.getvalue()