##// END OF EJS Templates
grep: move readfile() to grepsearcher class
grep: move readfile() to grepsearcher class

File last commit:

r44742:c443b9ba stable
r46291:888e633f default
Show More
linelog.py
467 lines | 14.9 KiB | text/x-python | PythonLexer
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 # linelog - efficient cache for annotate data
#
# Copyright 2018 Google LLC.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""linelog is an efficient cache for annotate data inspired by SCCS Weaves.
SCCS Weaves are an implementation of
https://en.wikipedia.org/wiki/Interleaved_deltas. See
Augie Fackler
cleanup: update references to /help/ that should now be /helptext/...
r44135 mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 and how linelog works in detail.
Here's a hacker's summary: a linelog is a program which is executed in
the context of a revision. Executing the program emits information
about lines, including the revision that introduced them and the line
number in the file at the introducing revision. When an insertion or
deletion is performed on the file, a jump instruction is used to patch
in a new body of annotate information.
"""
from __future__ import absolute_import, print_function
import abc
import struct
Augie Fackler
formatting: blacken the codebase...
r43346 from .thirdparty import attr
from . import pycompat
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 _llentry = struct.Struct(b'>II')
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class LineLogError(Exception):
"""Error raised when something bad happens internally in linelog."""
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 @attr.s
class lineinfo(object):
# Introducing revision of this line.
rev = attr.ib()
# Line number for this line in its introducing revision.
linenum = attr.ib()
# Private. Offset in the linelog program of this line. Used internally.
_offset = attr.ib()
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 @attr.s
class annotateresult(object):
rev = attr.ib()
lines = attr.ib()
_eof = attr.ib()
def __iter__(self):
return iter(self.lines)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: suppress annoying pytype warning about an ignored metaclass...
r43773 class _llinstruction(object): # pytype: disable=ignored-metaclass
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def __init__(self, op1, op2):
pass
@abc.abstractmethod
def __str__(self):
pass
def __repr__(self):
return str(self)
@abc.abstractmethod
def __eq__(self, other):
pass
@abc.abstractmethod
def encode(self):
"""Encode this instruction to the binary linelog format."""
@abc.abstractmethod
def execute(self, rev, pc, emit):
"""Execute this instruction.
Args:
rev: The revision we're annotating.
pc: The current offset in the linelog program.
emit: A function that accepts a single lineinfo object.
Returns:
The new value of pc. Returns None if exeuction should stop
(that is, we've found the end of the file.)
"""
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class _jge(_llinstruction):
"""If the current rev is greater than or equal to op1, jump to op2."""
def __init__(self, op1, op2):
self._cmprev = op1
self._target = op2
def __str__(self):
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 return 'JGE %d %d' % (self._cmprev, self._target)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __eq__(self, other):
Augie Fackler
formatting: blacken the codebase...
r43346 return (
type(self) == type(other)
and self._cmprev == other._cmprev
and self._target == other._target
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def encode(self):
return _llentry.pack(self._cmprev << 2, self._target)
def execute(self, rev, pc, emit):
if rev >= self._cmprev:
return self._target
return pc + 1
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class _jump(_llinstruction):
"""Unconditional jumps are expressed as a JGE with op1 set to 0."""
def __init__(self, op1, op2):
if op1 != 0:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 self._target = op2
def __str__(self):
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 return 'JUMP %d' % (self._target)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __eq__(self, other):
Augie Fackler
formatting: blacken the codebase...
r43346 return type(self) == type(other) and self._target == other._target
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def encode(self):
return _llentry.pack(0, self._target)
def execute(self, rev, pc, emit):
return self._target
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class _eof(_llinstruction):
"""EOF is expressed as a JGE that always jumps to 0."""
def __init__(self, op1, op2):
if op1 != 0:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 if op2 != 0:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __str__(self):
Augie Fackler
linelog: port to Python 3...
r38845 return r'EOF'
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __eq__(self, other):
return type(self) == type(other)
def encode(self):
return _llentry.pack(0, 0)
def execute(self, rev, pc, emit):
return None
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class _jl(_llinstruction):
"""If the current rev is less than op1, jump to op2."""
def __init__(self, op1, op2):
self._cmprev = op1
self._target = op2
def __str__(self):
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 return 'JL %d %d' % (self._cmprev, self._target)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __eq__(self, other):
Augie Fackler
formatting: blacken the codebase...
r43346 return (
type(self) == type(other)
and self._cmprev == other._cmprev
and self._target == other._target
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def encode(self):
return _llentry.pack(1 | (self._cmprev << 2), self._target)
def execute(self, rev, pc, emit):
if rev < self._cmprev:
return self._target
return pc + 1
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class _line(_llinstruction):
"""Emit a line."""
def __init__(self, op1, op2):
# This line was introduced by this revision number.
self._rev = op1
# This line had the specified line number in the introducing revision.
self._origlineno = op2
def __str__(self):
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 return 'LINE %d %d' % (self._rev, self._origlineno)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __eq__(self, other):
Augie Fackler
formatting: blacken the codebase...
r43346 return (
type(self) == type(other)
and self._rev == other._rev
and self._origlineno == other._origlineno
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def encode(self):
return _llentry.pack(2 | (self._rev << 2), self._origlineno)
def execute(self, rev, pc, emit):
emit(lineinfo(self._rev, self._origlineno, pc))
return pc + 1
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 def _decodeone(data, offset):
"""Decode a single linelog instruction from an offset in a buffer."""
try:
op1, op2 = _llentry.unpack_from(data, offset)
except struct.error as e:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise LineLogError(b'reading an instruction failed: %r' % e)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 opcode = op1 & 0b11
op1 = op1 >> 2
if opcode == 0:
if op1 == 0:
if op2 == 0:
return _eof(op1, op2)
return _jump(op1, op2)
return _jge(op1, op2)
elif opcode == 1:
return _jl(op1, op2)
elif opcode == 2:
return _line(op1, op2)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise NotImplementedError(b'Unimplemented opcode %r' % opcode)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 class linelog(object):
"""Efficient cache for per-line history information."""
def __init__(self, program=None, maxrev=0):
if program is None:
# We pad the program with an extra leading EOF so that our
# offsets will match the C code exactly. This means we can
# interoperate with the C code.
program = [_eof(0, 0), _eof(0, 0)]
self._program = program
self._lastannotate = None
self._maxrev = maxrev
def __eq__(self, other):
Augie Fackler
formatting: blacken the codebase...
r43346 return (
type(self) == type(other)
and self._program == other._program
and self._maxrev == other._maxrev
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def __repr__(self):
Kyle Lippincott
py3: __repr__ needs to return str, not bytes...
r44742 return '<linelog at %s: maxrev=%d size=%d>' % (
Augie Fackler
formatting: blacken the codebase...
r43346 hex(id(self)),
self._maxrev,
len(self._program),
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def debugstr(self):
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 fmt = '%%%dd %%s' % len(str(len(self._program)))
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return pycompat.sysstr(b'\n').join(
Augie Fackler
formatting: blacken the codebase...
r43346 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
@classmethod
def fromdata(cls, buf):
if len(buf) % _llentry.size != 0:
raise LineLogError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"invalid linelog buffer size %d (must be a multiple of %d)"
Augie Fackler
formatting: blacken the codebase...
r43346 % (len(buf), _llentry.size)
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 expected = len(buf) / _llentry.size
fakejge = _decodeone(buf, 0)
if isinstance(fakejge, _jump):
maxrev = 0
Augie Fackler
linelog: be more careful about types before looking for _target attribute...
r43774 elif isinstance(fakejge, (_jge, _jl)):
maxrev = fakejge._cmprev
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 else:
Augie Fackler
linelog: be more careful about types before looking for _target attribute...
r43774 raise LineLogError(
'Expected one of _jump, _jge, or _jl. Got %s.'
% type(fakejge).__name__
)
assert isinstance(fakejge, (_jump, _jge, _jl)) # help pytype
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 numentries = fakejge._target
if expected != numentries:
Augie Fackler
formatting: blacken the codebase...
r43346 raise LineLogError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"corrupt linelog data: claimed"
b" %d entries but given data for %d entries"
Augie Fackler
formatting: blacken the codebase...
r43346 % (expected, numentries)
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 instructions = [_eof(0, 0)]
for offset in pycompat.xrange(1, numentries):
instructions.append(_decodeone(buf, offset * _llentry.size))
return cls(instructions, maxrev=maxrev)
def encode(self):
hdr = _jge(self._maxrev, len(self._program)).encode()
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return hdr + b''.join(i.encode() for i in self._program[1:])
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831
def clear(self):
self._program = []
self._maxrev = 0
self._lastannotate = None
Augie Fackler
linelog: add replacelines_vec for fastannotate...
r38960 def replacelines_vec(self, rev, a1, a2, blines):
Augie Fackler
formatting: blacken the codebase...
r43346 return self.replacelines(
rev, a1, a2, 0, len(blines), _internal_blines=blines
)
Augie Fackler
linelog: add replacelines_vec for fastannotate...
r38960
def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 """Replace lines [a1, a2) with lines [b1, b2)."""
if self._lastannotate:
# TODO(augie): make replacelines() accept a revision at
# which we're editing as well as a revision to mark
# responsible for the edits. In hg-experimental it's
# stateful like this, so we're doing the same thing to
# retain compatibility with absorb until that's imported.
ar = self._lastannotate
else:
ar = self.annotate(rev)
# ar = self.annotate(self._maxrev)
if a1 > len(ar.lines):
raise LineLogError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'%d contains %d lines, tried to access line %d'
Augie Fackler
formatting: blacken the codebase...
r43346 % (rev, len(ar.lines), a1)
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 elif a1 == len(ar.lines):
# Simulated EOF instruction since we're at EOF, which
# doesn't have a "real" line.
a1inst = _eof(0, 0)
a1info = lineinfo(0, 0, ar._eof)
else:
a1info = ar.lines[a1]
a1inst = self._program[a1info._offset]
Jun Wu
linelog: extract `len(self._program)` to a local function...
r39006 programlen = self._program.__len__
oldproglen = programlen()
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 appendinst = self._program.append
# insert
Jun Wu
linelog: optimize replacelines...
r39007 blineinfos = []
bappend = blineinfos.append
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 if b1 < b2:
# Determine the jump target for the JGE at the start of
# the new block.
tgt = oldproglen + (b2 - b1 + 1)
# Jump to skip the insert if we're at an older revision.
appendinst(_jl(rev, tgt))
for linenum in pycompat.xrange(b1, b2):
Augie Fackler
linelog: add replacelines_vec for fastannotate...
r38960 if _internal_blines is None:
Jun Wu
linelog: optimize replacelines...
r39007 bappend(lineinfo(rev, linenum, programlen()))
Augie Fackler
linelog: add replacelines_vec for fastannotate...
r38960 appendinst(_line(rev, linenum))
else:
Jun Wu
linelog: optimize replacelines...
r39007 newrev, newlinenum = _internal_blines[linenum]
bappend(lineinfo(newrev, newlinenum, programlen()))
appendinst(_line(newrev, newlinenum))
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 # delete
if a1 < a2:
if a2 > len(ar.lines):
raise LineLogError(
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'%d contains %d lines, tried to access line %d'
Augie Fackler
formatting: blacken the codebase...
r43346 % (rev, len(ar.lines), a2)
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 elif a2 == len(ar.lines):
endaddr = ar._eof
else:
endaddr = ar.lines[a2]._offset
if a2 > 0 and rev < self._maxrev:
# If we're here, we're deleting a chunk of an old
# commit, so we need to be careful and not touch
# invisible lines between a2-1 and a2 (IOW, lines that
# are added later).
endaddr = ar.lines[a2 - 1]._offset + 1
appendinst(_jge(rev, endaddr))
# copy instruction from a1
Jun Wu
linelog: optimize replacelines...
r39007 a1instpc = programlen()
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 appendinst(a1inst)
# if a1inst isn't a jump or EOF, then we need to add an unconditional
# jump back into the program here.
if not isinstance(a1inst, (_jump, _eof)):
appendinst(_jump(0, a1info._offset + 1))
# Patch instruction at a1, which makes our patch live.
self._program[a1info._offset] = _jump(0, oldproglen)
Jun Wu
linelog: optimize replacelines...
r39007
# Update self._lastannotate in place. This serves as a cache to avoid
# expensive "self.annotate" in this function, when "replacelines" is
# used continuously.
if len(self._lastannotate.lines) > a1:
self._lastannotate.lines[a1]._offset = a1instpc
else:
assert isinstance(a1inst, _eof)
self._lastannotate._eof = a1instpc
self._lastannotate.lines[a1:a2] = blineinfos
self._lastannotate.rev = max(self._lastannotate.rev, rev)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 if rev > self._maxrev:
self._maxrev = rev
def annotate(self, rev):
pc = 1
lines = []
Jun Wu
linelog: fix infinite loop vulnerability...
r38970 executed = 0
# Sanity check: if instructions executed exceeds len(program), we
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 # hit an infinite loop in the linelog program somehow and we
# should stop.
Jun Wu
linelog: fix infinite loop vulnerability...
r38970 while pc is not None and executed < len(self._program):
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 inst = self._program[pc]
lastpc = pc
pc = inst.execute(rev, pc, lines.append)
Jun Wu
linelog: fix infinite loop vulnerability...
r38970 executed += 1
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 if pc is not None:
raise LineLogError(
Augie Fackler
formatting: blacken the codebase...
r43346 r'Probably hit an infinite loop in linelog. Program:\n'
+ self.debugstr()
)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 ar = annotateresult(rev, lines, lastpc)
self._lastannotate = ar
return ar
@property
def maxrev(self):
return self._maxrev
# Stateful methods which depend on the value of the last
# annotation run. This API is for compatiblity with the original
# linelog, and we should probably consider refactoring it.
@property
def annotateresult(self):
"""Return the last annotation result. C linelog code exposed this."""
return [(l.rev, l.linenum) for l in self._lastannotate.lines]
def getoffset(self, line):
return self._lastannotate.lines[line]._offset
def getalllines(self, start=0, end=0):
"""Get all lines that ever occurred in [start, end).
Passing start == end == 0 means "all lines ever".
This works in terms of *internal* program offsets, not line numbers.
"""
pc = start or 1
lines = []
# only take as many steps as there are instructions in the
# program - if we don't find an EOF or our stop-line before
# then, something is badly broken.
for step in pycompat.xrange(len(self._program)):
inst = self._program[pc]
nextpc = pc + 1
if isinstance(inst, _jump):
nextpc = inst._target
elif isinstance(inst, _eof):
return lines
elif isinstance(inst, (_jl, _jge)):
pass
elif isinstance(inst, _line):
lines.append((inst._rev, inst._origlineno))
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise LineLogError(b"Illegal instruction %r" % inst)
Augie Fackler
linelog: add a Python implementation of the linelog datastructure...
r38831 if nextpc == end:
return lines
pc = nextpc
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise LineLogError(b"Failed to perform getalllines")