linelog.py
478 lines
| 15.1 KiB
| text/x-python
|
PythonLexer
/ mercurial / linelog.py
Augie Fackler
|
r38831 | # linelog - efficient cache for annotate data | ||
# | ||||
# Copyright 2018 Google LLC. | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
"""linelog is an efficient cache for annotate data inspired by SCCS Weaves. | ||||
SCCS Weaves are an implementation of | ||||
https://en.wikipedia.org/wiki/Interleaved_deltas. See | ||||
Augie Fackler
|
r44135 | mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves | ||
Augie Fackler
|
r38831 | and how linelog works in detail. | ||
Here's a hacker's summary: a linelog is a program which is executed in | ||||
the context of a revision. Executing the program emits information | ||||
about lines, including the revision that introduced them and the line | ||||
number in the file at the introducing revision. When an insertion or | ||||
deletion is performed on the file, a jump instruction is used to patch | ||||
in a new body of annotate information. | ||||
""" | ||||
Matt Harbison
|
r52755 | from __future__ import annotations | ||
Augie Fackler
|
r38831 | import abc | ||
import struct | ||||
Matt Harbison
|
r52619 | import typing | ||
from typing import ( | ||||
List, | ||||
) | ||||
Augie Fackler
|
r38831 | |||
Augie Fackler
|
r43346 | from .thirdparty import attr | ||
Matt Harbison
|
r52621 | |||
# Force pytype to use the non-vendored package | ||||
if typing.TYPE_CHECKING: | ||||
# noinspection PyPackageRequirements | ||||
import attr | ||||
Augie Fackler
|
r43346 | from . import pycompat | ||
Augie Fackler
|
r38831 | |||
Augie Fackler
|
r43347 | _llentry = struct.Struct(b'>II') | ||
Augie Fackler
|
r38831 | |||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | class LineLogError(Exception): | ||
"""Error raised when something bad happens internally in linelog.""" | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | @attr.s | ||
Gregory Szorc
|
r49801 | class lineinfo: | ||
Augie Fackler
|
r38831 | # Introducing revision of this line. | ||
rev = attr.ib() | ||||
# Line number for this line in its introducing revision. | ||||
linenum = attr.ib() | ||||
# Private. Offset in the linelog program of this line. Used internally. | ||||
_offset = attr.ib() | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | @attr.s | ||
Gregory Szorc
|
r49801 | class annotateresult: | ||
Augie Fackler
|
r38831 | rev = attr.ib() | ||
Matt Harbison
|
r52619 | lines = attr.ib(type=List[lineinfo]) | ||
Augie Fackler
|
r38831 | _eof = attr.ib() | ||
def __iter__(self): | ||||
return iter(self.lines) | ||||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49801 | class _llinstruction: # pytype: disable=ignored-metaclass | ||
Augie Fackler
|
r38831 | __metaclass__ = abc.ABCMeta | ||
@abc.abstractmethod | ||||
def __init__(self, op1, op2): | ||||
pass | ||||
@abc.abstractmethod | ||||
def __str__(self): | ||||
pass | ||||
def __repr__(self): | ||||
return str(self) | ||||
@abc.abstractmethod | ||||
def __eq__(self, other): | ||||
pass | ||||
@abc.abstractmethod | ||||
def encode(self): | ||||
"""Encode this instruction to the binary linelog format.""" | ||||
@abc.abstractmethod | ||||
def execute(self, rev, pc, emit): | ||||
"""Execute this instruction. | ||||
Args: | ||||
rev: The revision we're annotating. | ||||
pc: The current offset in the linelog program. | ||||
emit: A function that accepts a single lineinfo object. | ||||
Returns: | ||||
The new value of pc. Returns None if exeuction should stop | ||||
(that is, we've found the end of the file.) | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | class _jge(_llinstruction): | ||
"""If the current rev is greater than or equal to op1, jump to op2.""" | ||||
def __init__(self, op1, op2): | ||||
self._cmprev = op1 | ||||
self._target = op2 | ||||
def __str__(self): | ||||
Augie Fackler
|
r43906 | return 'JGE %d %d' % (self._cmprev, self._target) | ||
Augie Fackler
|
r38831 | |||
def __eq__(self, other): | ||||
Augie Fackler
|
r43346 | return ( | ||
type(self) == type(other) | ||||
and self._cmprev == other._cmprev | ||||
and self._target == other._target | ||||
) | ||||
Augie Fackler
|
r38831 | |||
def encode(self): | ||||
return _llentry.pack(self._cmprev << 2, self._target) | ||||
def execute(self, rev, pc, emit): | ||||
if rev >= self._cmprev: | ||||
return self._target | ||||
return pc + 1 | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | class _jump(_llinstruction): | ||
"""Unconditional jumps are expressed as a JGE with op1 set to 0.""" | ||||
def __init__(self, op1, op2): | ||||
if op1 != 0: | ||||
Augie Fackler
|
r43347 | raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1) | ||
Augie Fackler
|
r38831 | self._target = op2 | ||
def __str__(self): | ||||
Augie Fackler
|
r43906 | return 'JUMP %d' % (self._target) | ||
Augie Fackler
|
r38831 | |||
def __eq__(self, other): | ||||
Augie Fackler
|
r43346 | return type(self) == type(other) and self._target == other._target | ||
Augie Fackler
|
r38831 | |||
def encode(self): | ||||
return _llentry.pack(0, self._target) | ||||
def execute(self, rev, pc, emit): | ||||
return self._target | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | class _eof(_llinstruction): | ||
"""EOF is expressed as a JGE that always jumps to 0.""" | ||||
def __init__(self, op1, op2): | ||||
if op1 != 0: | ||||
Augie Fackler
|
r43347 | raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1) | ||
Augie Fackler
|
r38831 | if op2 != 0: | ||
Augie Fackler
|
r43347 | raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2) | ||
Augie Fackler
|
r38831 | |||
def __str__(self): | ||||
Augie Fackler
|
r38845 | return r'EOF' | ||
Augie Fackler
|
r38831 | |||
def __eq__(self, other): | ||||
return type(self) == type(other) | ||||
def encode(self): | ||||
return _llentry.pack(0, 0) | ||||
def execute(self, rev, pc, emit): | ||||
return None | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | class _jl(_llinstruction): | ||
"""If the current rev is less than op1, jump to op2.""" | ||||
def __init__(self, op1, op2): | ||||
self._cmprev = op1 | ||||
self._target = op2 | ||||
def __str__(self): | ||||
Augie Fackler
|
r43906 | return 'JL %d %d' % (self._cmprev, self._target) | ||
Augie Fackler
|
r38831 | |||
def __eq__(self, other): | ||||
Augie Fackler
|
r43346 | return ( | ||
type(self) == type(other) | ||||
and self._cmprev == other._cmprev | ||||
and self._target == other._target | ||||
) | ||||
Augie Fackler
|
r38831 | |||
def encode(self): | ||||
return _llentry.pack(1 | (self._cmprev << 2), self._target) | ||||
def execute(self, rev, pc, emit): | ||||
if rev < self._cmprev: | ||||
return self._target | ||||
return pc + 1 | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | class _line(_llinstruction): | ||
"""Emit a line.""" | ||||
def __init__(self, op1, op2): | ||||
# This line was introduced by this revision number. | ||||
self._rev = op1 | ||||
# This line had the specified line number in the introducing revision. | ||||
self._origlineno = op2 | ||||
def __str__(self): | ||||
Augie Fackler
|
r43906 | return 'LINE %d %d' % (self._rev, self._origlineno) | ||
Augie Fackler
|
r38831 | |||
def __eq__(self, other): | ||||
Augie Fackler
|
r43346 | return ( | ||
type(self) == type(other) | ||||
and self._rev == other._rev | ||||
and self._origlineno == other._origlineno | ||||
) | ||||
Augie Fackler
|
r38831 | |||
def encode(self): | ||||
return _llentry.pack(2 | (self._rev << 2), self._origlineno) | ||||
def execute(self, rev, pc, emit): | ||||
emit(lineinfo(self._rev, self._origlineno, pc)) | ||||
return pc + 1 | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r38831 | def _decodeone(data, offset): | ||
"""Decode a single linelog instruction from an offset in a buffer.""" | ||||
try: | ||||
op1, op2 = _llentry.unpack_from(data, offset) | ||||
except struct.error as e: | ||||
Augie Fackler
|
r43347 | raise LineLogError(b'reading an instruction failed: %r' % e) | ||
Augie Fackler
|
r38831 | opcode = op1 & 0b11 | ||
op1 = op1 >> 2 | ||||
if opcode == 0: | ||||
if op1 == 0: | ||||
if op2 == 0: | ||||
return _eof(op1, op2) | ||||
return _jump(op1, op2) | ||||
return _jge(op1, op2) | ||||
elif opcode == 1: | ||||
return _jl(op1, op2) | ||||
elif opcode == 2: | ||||
return _line(op1, op2) | ||||
Augie Fackler
|
r43347 | raise NotImplementedError(b'Unimplemented opcode %r' % opcode) | ||
Augie Fackler
|
r38831 | |||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49801 | class linelog: | ||
Augie Fackler
|
r38831 | """Efficient cache for per-line history information.""" | ||
def __init__(self, program=None, maxrev=0): | ||||
if program is None: | ||||
# We pad the program with an extra leading EOF so that our | ||||
# offsets will match the C code exactly. This means we can | ||||
# interoperate with the C code. | ||||
program = [_eof(0, 0), _eof(0, 0)] | ||||
self._program = program | ||||
self._lastannotate = None | ||||
self._maxrev = maxrev | ||||
def __eq__(self, other): | ||||
Augie Fackler
|
r43346 | return ( | ||
type(self) == type(other) | ||||
and self._program == other._program | ||||
and self._maxrev == other._maxrev | ||||
) | ||||
Augie Fackler
|
r38831 | |||
def __repr__(self): | ||||
Kyle Lippincott
|
r44742 | return '<linelog at %s: maxrev=%d size=%d>' % ( | ||
Augie Fackler
|
r43346 | hex(id(self)), | ||
self._maxrev, | ||||
len(self._program), | ||||
) | ||||
Augie Fackler
|
r38831 | |||
def debugstr(self): | ||||
Augie Fackler
|
r43906 | fmt = '%%%dd %%s' % len(str(len(self._program))) | ||
Augie Fackler
|
r43347 | return pycompat.sysstr(b'\n').join( | ||
Augie Fackler
|
r43346 | fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1) | ||
) | ||||
Augie Fackler
|
r38831 | |||
@classmethod | ||||
def fromdata(cls, buf): | ||||
if len(buf) % _llentry.size != 0: | ||||
raise LineLogError( | ||||
Augie Fackler
|
r43347 | b"invalid linelog buffer size %d (must be a multiple of %d)" | ||
Augie Fackler
|
r43346 | % (len(buf), _llentry.size) | ||
) | ||||
Augie Fackler
|
r38831 | expected = len(buf) / _llentry.size | ||
fakejge = _decodeone(buf, 0) | ||||
if isinstance(fakejge, _jump): | ||||
maxrev = 0 | ||||
Augie Fackler
|
r43774 | elif isinstance(fakejge, (_jge, _jl)): | ||
maxrev = fakejge._cmprev | ||||
Augie Fackler
|
r38831 | else: | ||
Augie Fackler
|
r43774 | raise LineLogError( | ||
'Expected one of _jump, _jge, or _jl. Got %s.' | ||||
% type(fakejge).__name__ | ||||
) | ||||
assert isinstance(fakejge, (_jump, _jge, _jl)) # help pytype | ||||
Augie Fackler
|
r38831 | numentries = fakejge._target | ||
if expected != numentries: | ||||
Augie Fackler
|
r43346 | raise LineLogError( | ||
Augie Fackler
|
r43347 | b"corrupt linelog data: claimed" | ||
b" %d entries but given data for %d entries" | ||||
Augie Fackler
|
r43346 | % (expected, numentries) | ||
) | ||||
Augie Fackler
|
r38831 | instructions = [_eof(0, 0)] | ||
Manuel Jacob
|
r50179 | for offset in range(1, numentries): | ||
Augie Fackler
|
r38831 | instructions.append(_decodeone(buf, offset * _llentry.size)) | ||
return cls(instructions, maxrev=maxrev) | ||||
def encode(self): | ||||
hdr = _jge(self._maxrev, len(self._program)).encode() | ||||
Augie Fackler
|
r43347 | return hdr + b''.join(i.encode() for i in self._program[1:]) | ||
Augie Fackler
|
r38831 | |||
def clear(self): | ||||
self._program = [] | ||||
self._maxrev = 0 | ||||
self._lastannotate = None | ||||
Augie Fackler
|
r38960 | def replacelines_vec(self, rev, a1, a2, blines): | ||
Augie Fackler
|
r43346 | return self.replacelines( | ||
rev, a1, a2, 0, len(blines), _internal_blines=blines | ||||
) | ||||
Augie Fackler
|
r38960 | |||
def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None): | ||||
Augie Fackler
|
r38831 | """Replace lines [a1, a2) with lines [b1, b2).""" | ||
if self._lastannotate: | ||||
# TODO(augie): make replacelines() accept a revision at | ||||
# which we're editing as well as a revision to mark | ||||
# responsible for the edits. In hg-experimental it's | ||||
# stateful like this, so we're doing the same thing to | ||||
# retain compatibility with absorb until that's imported. | ||||
ar = self._lastannotate | ||||
else: | ||||
ar = self.annotate(rev) | ||||
# ar = self.annotate(self._maxrev) | ||||
if a1 > len(ar.lines): | ||||
raise LineLogError( | ||||
Augie Fackler
|
r43347 | b'%d contains %d lines, tried to access line %d' | ||
Augie Fackler
|
r43346 | % (rev, len(ar.lines), a1) | ||
) | ||||
Augie Fackler
|
r38831 | elif a1 == len(ar.lines): | ||
# Simulated EOF instruction since we're at EOF, which | ||||
# doesn't have a "real" line. | ||||
a1inst = _eof(0, 0) | ||||
a1info = lineinfo(0, 0, ar._eof) | ||||
else: | ||||
a1info = ar.lines[a1] | ||||
a1inst = self._program[a1info._offset] | ||||
Jun Wu
|
r39006 | programlen = self._program.__len__ | ||
oldproglen = programlen() | ||||
Augie Fackler
|
r38831 | appendinst = self._program.append | ||
# insert | ||||
Jun Wu
|
r39007 | blineinfos = [] | ||
bappend = blineinfos.append | ||||
Augie Fackler
|
r38831 | if b1 < b2: | ||
# Determine the jump target for the JGE at the start of | ||||
# the new block. | ||||
tgt = oldproglen + (b2 - b1 + 1) | ||||
# Jump to skip the insert if we're at an older revision. | ||||
appendinst(_jl(rev, tgt)) | ||||
Manuel Jacob
|
r50179 | for linenum in range(b1, b2): | ||
Augie Fackler
|
r38960 | if _internal_blines is None: | ||
Jun Wu
|
r39007 | bappend(lineinfo(rev, linenum, programlen())) | ||
Augie Fackler
|
r38960 | appendinst(_line(rev, linenum)) | ||
else: | ||||
Jun Wu
|
r39007 | newrev, newlinenum = _internal_blines[linenum] | ||
bappend(lineinfo(newrev, newlinenum, programlen())) | ||||
appendinst(_line(newrev, newlinenum)) | ||||
Augie Fackler
|
r38831 | # delete | ||
if a1 < a2: | ||||
if a2 > len(ar.lines): | ||||
raise LineLogError( | ||||
Augie Fackler
|
r43347 | b'%d contains %d lines, tried to access line %d' | ||
Augie Fackler
|
r43346 | % (rev, len(ar.lines), a2) | ||
) | ||||
Augie Fackler
|
r38831 | elif a2 == len(ar.lines): | ||
endaddr = ar._eof | ||||
else: | ||||
endaddr = ar.lines[a2]._offset | ||||
if a2 > 0 and rev < self._maxrev: | ||||
# If we're here, we're deleting a chunk of an old | ||||
# commit, so we need to be careful and not touch | ||||
# invisible lines between a2-1 and a2 (IOW, lines that | ||||
# are added later). | ||||
endaddr = ar.lines[a2 - 1]._offset + 1 | ||||
appendinst(_jge(rev, endaddr)) | ||||
# copy instruction from a1 | ||||
Jun Wu
|
r39007 | a1instpc = programlen() | ||
Augie Fackler
|
r38831 | appendinst(a1inst) | ||
# if a1inst isn't a jump or EOF, then we need to add an unconditional | ||||
# jump back into the program here. | ||||
if not isinstance(a1inst, (_jump, _eof)): | ||||
appendinst(_jump(0, a1info._offset + 1)) | ||||
# Patch instruction at a1, which makes our patch live. | ||||
self._program[a1info._offset] = _jump(0, oldproglen) | ||||
Jun Wu
|
r39007 | |||
# Update self._lastannotate in place. This serves as a cache to avoid | ||||
# expensive "self.annotate" in this function, when "replacelines" is | ||||
# used continuously. | ||||
if len(self._lastannotate.lines) > a1: | ||||
self._lastannotate.lines[a1]._offset = a1instpc | ||||
else: | ||||
assert isinstance(a1inst, _eof) | ||||
self._lastannotate._eof = a1instpc | ||||
self._lastannotate.lines[a1:a2] = blineinfos | ||||
self._lastannotate.rev = max(self._lastannotate.rev, rev) | ||||
Augie Fackler
|
r38831 | if rev > self._maxrev: | ||
self._maxrev = rev | ||||
def annotate(self, rev): | ||||
pc = 1 | ||||
Matt Harbison
|
r52619 | lines: List[lineinfo] = [] | ||
Jun Wu
|
r38970 | executed = 0 | ||
# Sanity check: if instructions executed exceeds len(program), we | ||||
Augie Fackler
|
r38831 | # hit an infinite loop in the linelog program somehow and we | ||
# should stop. | ||||
Jun Wu
|
r38970 | while pc is not None and executed < len(self._program): | ||
Augie Fackler
|
r38831 | inst = self._program[pc] | ||
lastpc = pc | ||||
pc = inst.execute(rev, pc, lines.append) | ||||
Jun Wu
|
r38970 | executed += 1 | ||
Augie Fackler
|
r38831 | if pc is not None: | ||
raise LineLogError( | ||||
Augie Fackler
|
r43346 | r'Probably hit an infinite loop in linelog. Program:\n' | ||
+ self.debugstr() | ||||
) | ||||
Augie Fackler
|
r38831 | ar = annotateresult(rev, lines, lastpc) | ||
self._lastannotate = ar | ||||
return ar | ||||
@property | ||||
def maxrev(self): | ||||
return self._maxrev | ||||
# Stateful methods which depend on the value of the last | ||||
# annotation run. This API is for compatiblity with the original | ||||
# linelog, and we should probably consider refactoring it. | ||||
@property | ||||
def annotateresult(self): | ||||
"""Return the last annotation result. C linelog code exposed this.""" | ||||
return [(l.rev, l.linenum) for l in self._lastannotate.lines] | ||||
def getoffset(self, line): | ||||
return self._lastannotate.lines[line]._offset | ||||
def getalllines(self, start=0, end=0): | ||||
"""Get all lines that ever occurred in [start, end). | ||||
Passing start == end == 0 means "all lines ever". | ||||
This works in terms of *internal* program offsets, not line numbers. | ||||
""" | ||||
pc = start or 1 | ||||
lines = [] | ||||
# only take as many steps as there are instructions in the | ||||
# program - if we don't find an EOF or our stop-line before | ||||
# then, something is badly broken. | ||||
Manuel Jacob
|
r50179 | for step in range(len(self._program)): | ||
Augie Fackler
|
r38831 | inst = self._program[pc] | ||
nextpc = pc + 1 | ||||
if isinstance(inst, _jump): | ||||
nextpc = inst._target | ||||
elif isinstance(inst, _eof): | ||||
return lines | ||||
elif isinstance(inst, (_jl, _jge)): | ||||
pass | ||||
elif isinstance(inst, _line): | ||||
lines.append((inst._rev, inst._origlineno)) | ||||
else: | ||||
Augie Fackler
|
r43347 | raise LineLogError(b"Illegal instruction %r" % inst) | ||
Augie Fackler
|
r38831 | if nextpc == end: | ||
return lines | ||||
pc = nextpc | ||||
Augie Fackler
|
r43347 | raise LineLogError(b"Failed to perform getalllines") | ||