##// END OF EJS Templates
linelog: fix infinite loop vulnerability...
Jun Wu -
r38970:27a54096 default
parent child Browse files
Show More
@@ -1,421 +1,423 b''
1 # linelog - efficient cache for annotate data
1 # linelog - efficient cache for annotate data
2 #
2 #
3 # Copyright 2018 Google LLC.
3 # Copyright 2018 Google LLC.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
7 """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
8
8
9 SCCS Weaves are an implementation of
9 SCCS Weaves are an implementation of
10 https://en.wikipedia.org/wiki/Interleaved_deltas. See
10 https://en.wikipedia.org/wiki/Interleaved_deltas. See
11 mercurial/help/internals/linelog.txt for an exploration of SCCS weaves
11 mercurial/help/internals/linelog.txt for an exploration of SCCS weaves
12 and how linelog works in detail.
12 and how linelog works in detail.
13
13
14 Here's a hacker's summary: a linelog is a program which is executed in
14 Here's a hacker's summary: a linelog is a program which is executed in
15 the context of a revision. Executing the program emits information
15 the context of a revision. Executing the program emits information
16 about lines, including the revision that introduced them and the line
16 about lines, including the revision that introduced them and the line
17 number in the file at the introducing revision. When an insertion or
17 number in the file at the introducing revision. When an insertion or
18 deletion is performed on the file, a jump instruction is used to patch
18 deletion is performed on the file, a jump instruction is used to patch
19 in a new body of annotate information.
19 in a new body of annotate information.
20 """
20 """
21 from __future__ import absolute_import, print_function
21 from __future__ import absolute_import, print_function
22
22
23 import abc
23 import abc
24 import struct
24 import struct
25
25
26 from .thirdparty import (
26 from .thirdparty import (
27 attr,
27 attr,
28 )
28 )
29 from . import (
29 from . import (
30 pycompat,
30 pycompat,
31 )
31 )
32
32
33 _llentry = struct.Struct('>II')
33 _llentry = struct.Struct('>II')
34
34
35 class LineLogError(Exception):
35 class LineLogError(Exception):
36 """Error raised when something bad happens internally in linelog."""
36 """Error raised when something bad happens internally in linelog."""
37
37
38 @attr.s
38 @attr.s
39 class lineinfo(object):
39 class lineinfo(object):
40 # Introducing revision of this line.
40 # Introducing revision of this line.
41 rev = attr.ib()
41 rev = attr.ib()
42 # Line number for this line in its introducing revision.
42 # Line number for this line in its introducing revision.
43 linenum = attr.ib()
43 linenum = attr.ib()
44 # Private. Offset in the linelog program of this line. Used internally.
44 # Private. Offset in the linelog program of this line. Used internally.
45 _offset = attr.ib()
45 _offset = attr.ib()
46
46
47 @attr.s
47 @attr.s
48 class annotateresult(object):
48 class annotateresult(object):
49 rev = attr.ib()
49 rev = attr.ib()
50 lines = attr.ib()
50 lines = attr.ib()
51 _eof = attr.ib()
51 _eof = attr.ib()
52
52
53 def __iter__(self):
53 def __iter__(self):
54 return iter(self.lines)
54 return iter(self.lines)
55
55
56 class _llinstruction(object):
56 class _llinstruction(object):
57
57
58 __metaclass__ = abc.ABCMeta
58 __metaclass__ = abc.ABCMeta
59
59
60 @abc.abstractmethod
60 @abc.abstractmethod
61 def __init__(self, op1, op2):
61 def __init__(self, op1, op2):
62 pass
62 pass
63
63
64 @abc.abstractmethod
64 @abc.abstractmethod
65 def __str__(self):
65 def __str__(self):
66 pass
66 pass
67
67
68 def __repr__(self):
68 def __repr__(self):
69 return str(self)
69 return str(self)
70
70
71 @abc.abstractmethod
71 @abc.abstractmethod
72 def __eq__(self, other):
72 def __eq__(self, other):
73 pass
73 pass
74
74
75 @abc.abstractmethod
75 @abc.abstractmethod
76 def encode(self):
76 def encode(self):
77 """Encode this instruction to the binary linelog format."""
77 """Encode this instruction to the binary linelog format."""
78
78
79 @abc.abstractmethod
79 @abc.abstractmethod
80 def execute(self, rev, pc, emit):
80 def execute(self, rev, pc, emit):
81 """Execute this instruction.
81 """Execute this instruction.
82
82
83 Args:
83 Args:
84 rev: The revision we're annotating.
84 rev: The revision we're annotating.
85 pc: The current offset in the linelog program.
85 pc: The current offset in the linelog program.
86 emit: A function that accepts a single lineinfo object.
86 emit: A function that accepts a single lineinfo object.
87
87
88 Returns:
88 Returns:
89 The new value of pc. Returns None if exeuction should stop
89 The new value of pc. Returns None if exeuction should stop
90 (that is, we've found the end of the file.)
90 (that is, we've found the end of the file.)
91 """
91 """
92
92
93 class _jge(_llinstruction):
93 class _jge(_llinstruction):
94 """If the current rev is greater than or equal to op1, jump to op2."""
94 """If the current rev is greater than or equal to op1, jump to op2."""
95
95
96 def __init__(self, op1, op2):
96 def __init__(self, op1, op2):
97 self._cmprev = op1
97 self._cmprev = op1
98 self._target = op2
98 self._target = op2
99
99
100 def __str__(self):
100 def __str__(self):
101 return r'JGE %d %d' % (self._cmprev, self._target)
101 return r'JGE %d %d' % (self._cmprev, self._target)
102
102
103 def __eq__(self, other):
103 def __eq__(self, other):
104 return (type(self) == type(other)
104 return (type(self) == type(other)
105 and self._cmprev == other._cmprev
105 and self._cmprev == other._cmprev
106 and self._target == other._target)
106 and self._target == other._target)
107
107
108 def encode(self):
108 def encode(self):
109 return _llentry.pack(self._cmprev << 2, self._target)
109 return _llentry.pack(self._cmprev << 2, self._target)
110
110
111 def execute(self, rev, pc, emit):
111 def execute(self, rev, pc, emit):
112 if rev >= self._cmprev:
112 if rev >= self._cmprev:
113 return self._target
113 return self._target
114 return pc + 1
114 return pc + 1
115
115
116 class _jump(_llinstruction):
116 class _jump(_llinstruction):
117 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
117 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
118
118
119 def __init__(self, op1, op2):
119 def __init__(self, op1, op2):
120 if op1 != 0:
120 if op1 != 0:
121 raise LineLogError("malformed JUMP, op1 must be 0, got %d" % op1)
121 raise LineLogError("malformed JUMP, op1 must be 0, got %d" % op1)
122 self._target = op2
122 self._target = op2
123
123
124 def __str__(self):
124 def __str__(self):
125 return r'JUMP %d' % (self._target)
125 return r'JUMP %d' % (self._target)
126
126
127 def __eq__(self, other):
127 def __eq__(self, other):
128 return (type(self) == type(other)
128 return (type(self) == type(other)
129 and self._target == other._target)
129 and self._target == other._target)
130
130
131 def encode(self):
131 def encode(self):
132 return _llentry.pack(0, self._target)
132 return _llentry.pack(0, self._target)
133
133
134 def execute(self, rev, pc, emit):
134 def execute(self, rev, pc, emit):
135 return self._target
135 return self._target
136
136
137 class _eof(_llinstruction):
137 class _eof(_llinstruction):
138 """EOF is expressed as a JGE that always jumps to 0."""
138 """EOF is expressed as a JGE that always jumps to 0."""
139
139
140 def __init__(self, op1, op2):
140 def __init__(self, op1, op2):
141 if op1 != 0:
141 if op1 != 0:
142 raise LineLogError("malformed EOF, op1 must be 0, got %d" % op1)
142 raise LineLogError("malformed EOF, op1 must be 0, got %d" % op1)
143 if op2 != 0:
143 if op2 != 0:
144 raise LineLogError("malformed EOF, op2 must be 0, got %d" % op2)
144 raise LineLogError("malformed EOF, op2 must be 0, got %d" % op2)
145
145
146 def __str__(self):
146 def __str__(self):
147 return r'EOF'
147 return r'EOF'
148
148
149 def __eq__(self, other):
149 def __eq__(self, other):
150 return type(self) == type(other)
150 return type(self) == type(other)
151
151
152 def encode(self):
152 def encode(self):
153 return _llentry.pack(0, 0)
153 return _llentry.pack(0, 0)
154
154
155 def execute(self, rev, pc, emit):
155 def execute(self, rev, pc, emit):
156 return None
156 return None
157
157
158 class _jl(_llinstruction):
158 class _jl(_llinstruction):
159 """If the current rev is less than op1, jump to op2."""
159 """If the current rev is less than op1, jump to op2."""
160
160
161 def __init__(self, op1, op2):
161 def __init__(self, op1, op2):
162 self._cmprev = op1
162 self._cmprev = op1
163 self._target = op2
163 self._target = op2
164
164
165 def __str__(self):
165 def __str__(self):
166 return r'JL %d %d' % (self._cmprev, self._target)
166 return r'JL %d %d' % (self._cmprev, self._target)
167
167
168 def __eq__(self, other):
168 def __eq__(self, other):
169 return (type(self) == type(other)
169 return (type(self) == type(other)
170 and self._cmprev == other._cmprev
170 and self._cmprev == other._cmprev
171 and self._target == other._target)
171 and self._target == other._target)
172
172
173 def encode(self):
173 def encode(self):
174 return _llentry.pack(1 | (self._cmprev << 2), self._target)
174 return _llentry.pack(1 | (self._cmprev << 2), self._target)
175
175
176 def execute(self, rev, pc, emit):
176 def execute(self, rev, pc, emit):
177 if rev < self._cmprev:
177 if rev < self._cmprev:
178 return self._target
178 return self._target
179 return pc + 1
179 return pc + 1
180
180
181 class _line(_llinstruction):
181 class _line(_llinstruction):
182 """Emit a line."""
182 """Emit a line."""
183
183
184 def __init__(self, op1, op2):
184 def __init__(self, op1, op2):
185 # This line was introduced by this revision number.
185 # This line was introduced by this revision number.
186 self._rev = op1
186 self._rev = op1
187 # This line had the specified line number in the introducing revision.
187 # This line had the specified line number in the introducing revision.
188 self._origlineno = op2
188 self._origlineno = op2
189
189
190 def __str__(self):
190 def __str__(self):
191 return r'LINE %d %d' % (self._rev, self._origlineno)
191 return r'LINE %d %d' % (self._rev, self._origlineno)
192
192
193 def __eq__(self, other):
193 def __eq__(self, other):
194 return (type(self) == type(other)
194 return (type(self) == type(other)
195 and self._rev == other._rev
195 and self._rev == other._rev
196 and self._origlineno == other._origlineno)
196 and self._origlineno == other._origlineno)
197
197
198 def encode(self):
198 def encode(self):
199 return _llentry.pack(2 | (self._rev << 2), self._origlineno)
199 return _llentry.pack(2 | (self._rev << 2), self._origlineno)
200
200
201 def execute(self, rev, pc, emit):
201 def execute(self, rev, pc, emit):
202 emit(lineinfo(self._rev, self._origlineno, pc))
202 emit(lineinfo(self._rev, self._origlineno, pc))
203 return pc + 1
203 return pc + 1
204
204
205 def _decodeone(data, offset):
205 def _decodeone(data, offset):
206 """Decode a single linelog instruction from an offset in a buffer."""
206 """Decode a single linelog instruction from an offset in a buffer."""
207 try:
207 try:
208 op1, op2 = _llentry.unpack_from(data, offset)
208 op1, op2 = _llentry.unpack_from(data, offset)
209 except struct.error as e:
209 except struct.error as e:
210 raise LineLogError('reading an instruction failed: %r' % e)
210 raise LineLogError('reading an instruction failed: %r' % e)
211 opcode = op1 & 0b11
211 opcode = op1 & 0b11
212 op1 = op1 >> 2
212 op1 = op1 >> 2
213 if opcode == 0:
213 if opcode == 0:
214 if op1 == 0:
214 if op1 == 0:
215 if op2 == 0:
215 if op2 == 0:
216 return _eof(op1, op2)
216 return _eof(op1, op2)
217 return _jump(op1, op2)
217 return _jump(op1, op2)
218 return _jge(op1, op2)
218 return _jge(op1, op2)
219 elif opcode == 1:
219 elif opcode == 1:
220 return _jl(op1, op2)
220 return _jl(op1, op2)
221 elif opcode == 2:
221 elif opcode == 2:
222 return _line(op1, op2)
222 return _line(op1, op2)
223 raise NotImplementedError('Unimplemented opcode %r' % opcode)
223 raise NotImplementedError('Unimplemented opcode %r' % opcode)
224
224
225 class linelog(object):
225 class linelog(object):
226 """Efficient cache for per-line history information."""
226 """Efficient cache for per-line history information."""
227
227
228 def __init__(self, program=None, maxrev=0):
228 def __init__(self, program=None, maxrev=0):
229 if program is None:
229 if program is None:
230 # We pad the program with an extra leading EOF so that our
230 # We pad the program with an extra leading EOF so that our
231 # offsets will match the C code exactly. This means we can
231 # offsets will match the C code exactly. This means we can
232 # interoperate with the C code.
232 # interoperate with the C code.
233 program = [_eof(0, 0), _eof(0, 0)]
233 program = [_eof(0, 0), _eof(0, 0)]
234 self._program = program
234 self._program = program
235 self._lastannotate = None
235 self._lastannotate = None
236 self._maxrev = maxrev
236 self._maxrev = maxrev
237
237
238 def __eq__(self, other):
238 def __eq__(self, other):
239 return (type(self) == type(other)
239 return (type(self) == type(other)
240 and self._program == other._program
240 and self._program == other._program
241 and self._maxrev == other._maxrev)
241 and self._maxrev == other._maxrev)
242
242
243 def __repr__(self):
243 def __repr__(self):
244 return '<linelog at %s: maxrev=%d size=%d>' % (
244 return '<linelog at %s: maxrev=%d size=%d>' % (
245 hex(id(self)), self._maxrev, len(self._program))
245 hex(id(self)), self._maxrev, len(self._program))
246
246
247 def debugstr(self):
247 def debugstr(self):
248 fmt = r'%%%dd %%s' % len(str(len(self._program)))
248 fmt = r'%%%dd %%s' % len(str(len(self._program)))
249 return pycompat.sysstr('\n').join(
249 return pycompat.sysstr('\n').join(
250 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1))
250 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1))
251
251
252 @classmethod
252 @classmethod
253 def fromdata(cls, buf):
253 def fromdata(cls, buf):
254 if len(buf) % _llentry.size != 0:
254 if len(buf) % _llentry.size != 0:
255 raise LineLogError(
255 raise LineLogError(
256 "invalid linelog buffer size %d (must be a multiple of %d)" % (
256 "invalid linelog buffer size %d (must be a multiple of %d)" % (
257 len(buf), _llentry.size))
257 len(buf), _llentry.size))
258 expected = len(buf) / _llentry.size
258 expected = len(buf) / _llentry.size
259 fakejge = _decodeone(buf, 0)
259 fakejge = _decodeone(buf, 0)
260 if isinstance(fakejge, _jump):
260 if isinstance(fakejge, _jump):
261 maxrev = 0
261 maxrev = 0
262 else:
262 else:
263 maxrev = fakejge._cmprev
263 maxrev = fakejge._cmprev
264 numentries = fakejge._target
264 numentries = fakejge._target
265 if expected != numentries:
265 if expected != numentries:
266 raise LineLogError("corrupt linelog data: claimed"
266 raise LineLogError("corrupt linelog data: claimed"
267 " %d entries but given data for %d entries" % (
267 " %d entries but given data for %d entries" % (
268 expected, numentries))
268 expected, numentries))
269 instructions = [_eof(0, 0)]
269 instructions = [_eof(0, 0)]
270 for offset in pycompat.xrange(1, numentries):
270 for offset in pycompat.xrange(1, numentries):
271 instructions.append(_decodeone(buf, offset * _llentry.size))
271 instructions.append(_decodeone(buf, offset * _llentry.size))
272 return cls(instructions, maxrev=maxrev)
272 return cls(instructions, maxrev=maxrev)
273
273
274 def encode(self):
274 def encode(self):
275 hdr = _jge(self._maxrev, len(self._program)).encode()
275 hdr = _jge(self._maxrev, len(self._program)).encode()
276 return hdr + ''.join(i.encode() for i in self._program[1:])
276 return hdr + ''.join(i.encode() for i in self._program[1:])
277
277
278 def clear(self):
278 def clear(self):
279 self._program = []
279 self._program = []
280 self._maxrev = 0
280 self._maxrev = 0
281 self._lastannotate = None
281 self._lastannotate = None
282
282
283 def replacelines_vec(self, rev, a1, a2, blines):
283 def replacelines_vec(self, rev, a1, a2, blines):
284 return self.replacelines(rev, a1, a2, 0, len(blines),
284 return self.replacelines(rev, a1, a2, 0, len(blines),
285 _internal_blines=blines)
285 _internal_blines=blines)
286
286
287 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
287 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
288 """Replace lines [a1, a2) with lines [b1, b2)."""
288 """Replace lines [a1, a2) with lines [b1, b2)."""
289 if self._lastannotate:
289 if self._lastannotate:
290 # TODO(augie): make replacelines() accept a revision at
290 # TODO(augie): make replacelines() accept a revision at
291 # which we're editing as well as a revision to mark
291 # which we're editing as well as a revision to mark
292 # responsible for the edits. In hg-experimental it's
292 # responsible for the edits. In hg-experimental it's
293 # stateful like this, so we're doing the same thing to
293 # stateful like this, so we're doing the same thing to
294 # retain compatibility with absorb until that's imported.
294 # retain compatibility with absorb until that's imported.
295 ar = self._lastannotate
295 ar = self._lastannotate
296 else:
296 else:
297 ar = self.annotate(rev)
297 ar = self.annotate(rev)
298 # ar = self.annotate(self._maxrev)
298 # ar = self.annotate(self._maxrev)
299 if a1 > len(ar.lines):
299 if a1 > len(ar.lines):
300 raise LineLogError(
300 raise LineLogError(
301 '%d contains %d lines, tried to access line %d' % (
301 '%d contains %d lines, tried to access line %d' % (
302 rev, len(ar.lines), a1))
302 rev, len(ar.lines), a1))
303 elif a1 == len(ar.lines):
303 elif a1 == len(ar.lines):
304 # Simulated EOF instruction since we're at EOF, which
304 # Simulated EOF instruction since we're at EOF, which
305 # doesn't have a "real" line.
305 # doesn't have a "real" line.
306 a1inst = _eof(0, 0)
306 a1inst = _eof(0, 0)
307 a1info = lineinfo(0, 0, ar._eof)
307 a1info = lineinfo(0, 0, ar._eof)
308 else:
308 else:
309 a1info = ar.lines[a1]
309 a1info = ar.lines[a1]
310 a1inst = self._program[a1info._offset]
310 a1inst = self._program[a1info._offset]
311 oldproglen = len(self._program)
311 oldproglen = len(self._program)
312 appendinst = self._program.append
312 appendinst = self._program.append
313
313
314 # insert
314 # insert
315 if b1 < b2:
315 if b1 < b2:
316 # Determine the jump target for the JGE at the start of
316 # Determine the jump target for the JGE at the start of
317 # the new block.
317 # the new block.
318 tgt = oldproglen + (b2 - b1 + 1)
318 tgt = oldproglen + (b2 - b1 + 1)
319 # Jump to skip the insert if we're at an older revision.
319 # Jump to skip the insert if we're at an older revision.
320 appendinst(_jl(rev, tgt))
320 appendinst(_jl(rev, tgt))
321 for linenum in pycompat.xrange(b1, b2):
321 for linenum in pycompat.xrange(b1, b2):
322 if _internal_blines is None:
322 if _internal_blines is None:
323 appendinst(_line(rev, linenum))
323 appendinst(_line(rev, linenum))
324 else:
324 else:
325 appendinst(_line(*_internal_blines[linenum]))
325 appendinst(_line(*_internal_blines[linenum]))
326 # delete
326 # delete
327 if a1 < a2:
327 if a1 < a2:
328 if a2 > len(ar.lines):
328 if a2 > len(ar.lines):
329 raise LineLogError(
329 raise LineLogError(
330 '%d contains %d lines, tried to access line %d' % (
330 '%d contains %d lines, tried to access line %d' % (
331 rev, len(ar.lines), a2))
331 rev, len(ar.lines), a2))
332 elif a2 == len(ar.lines):
332 elif a2 == len(ar.lines):
333 endaddr = ar._eof
333 endaddr = ar._eof
334 else:
334 else:
335 endaddr = ar.lines[a2]._offset
335 endaddr = ar.lines[a2]._offset
336 if a2 > 0 and rev < self._maxrev:
336 if a2 > 0 and rev < self._maxrev:
337 # If we're here, we're deleting a chunk of an old
337 # If we're here, we're deleting a chunk of an old
338 # commit, so we need to be careful and not touch
338 # commit, so we need to be careful and not touch
339 # invisible lines between a2-1 and a2 (IOW, lines that
339 # invisible lines between a2-1 and a2 (IOW, lines that
340 # are added later).
340 # are added later).
341 endaddr = ar.lines[a2 - 1]._offset + 1
341 endaddr = ar.lines[a2 - 1]._offset + 1
342 appendinst(_jge(rev, endaddr))
342 appendinst(_jge(rev, endaddr))
343 # copy instruction from a1
343 # copy instruction from a1
344 appendinst(a1inst)
344 appendinst(a1inst)
345 # if a1inst isn't a jump or EOF, then we need to add an unconditional
345 # if a1inst isn't a jump or EOF, then we need to add an unconditional
346 # jump back into the program here.
346 # jump back into the program here.
347 if not isinstance(a1inst, (_jump, _eof)):
347 if not isinstance(a1inst, (_jump, _eof)):
348 appendinst(_jump(0, a1info._offset + 1))
348 appendinst(_jump(0, a1info._offset + 1))
349 # Patch instruction at a1, which makes our patch live.
349 # Patch instruction at a1, which makes our patch live.
350 self._program[a1info._offset] = _jump(0, oldproglen)
350 self._program[a1info._offset] = _jump(0, oldproglen)
351 # For compat with the C version, re-annotate rev so that
351 # For compat with the C version, re-annotate rev so that
352 # self.annotateresult is cromulent.. We could fix up the
352 # self.annotateresult is cromulent.. We could fix up the
353 # annotateresult in place (which is how the C version works),
353 # annotateresult in place (which is how the C version works),
354 # but for now we'll pass on that and see if it matters in
354 # but for now we'll pass on that and see if it matters in
355 # practice.
355 # practice.
356 self.annotate(max(self._lastannotate.rev, rev))
356 self.annotate(max(self._lastannotate.rev, rev))
357 if rev > self._maxrev:
357 if rev > self._maxrev:
358 self._maxrev = rev
358 self._maxrev = rev
359
359
360 def annotate(self, rev):
360 def annotate(self, rev):
361 pc = 1
361 pc = 1
362 lines = []
362 lines = []
363 # Sanity check: if len(lines) is longer than len(program), we
363 executed = 0
364 # Sanity check: if instructions executed exceeds len(program), we
364 # hit an infinite loop in the linelog program somehow and we
365 # hit an infinite loop in the linelog program somehow and we
365 # should stop.
366 # should stop.
366 while pc is not None and len(lines) < len(self._program):
367 while pc is not None and executed < len(self._program):
367 inst = self._program[pc]
368 inst = self._program[pc]
368 lastpc = pc
369 lastpc = pc
369 pc = inst.execute(rev, pc, lines.append)
370 pc = inst.execute(rev, pc, lines.append)
371 executed += 1
370 if pc is not None:
372 if pc is not None:
371 raise LineLogError(
373 raise LineLogError(
372 'Probably hit an infinite loop in linelog. Program:\n' +
374 'Probably hit an infinite loop in linelog. Program:\n' +
373 self.debugstr())
375 self.debugstr())
374 ar = annotateresult(rev, lines, lastpc)
376 ar = annotateresult(rev, lines, lastpc)
375 self._lastannotate = ar
377 self._lastannotate = ar
376 return ar
378 return ar
377
379
378 @property
380 @property
379 def maxrev(self):
381 def maxrev(self):
380 return self._maxrev
382 return self._maxrev
381
383
382 # Stateful methods which depend on the value of the last
384 # Stateful methods which depend on the value of the last
383 # annotation run. This API is for compatiblity with the original
385 # annotation run. This API is for compatiblity with the original
384 # linelog, and we should probably consider refactoring it.
386 # linelog, and we should probably consider refactoring it.
385 @property
387 @property
386 def annotateresult(self):
388 def annotateresult(self):
387 """Return the last annotation result. C linelog code exposed this."""
389 """Return the last annotation result. C linelog code exposed this."""
388 return [(l.rev, l.linenum) for l in self._lastannotate.lines]
390 return [(l.rev, l.linenum) for l in self._lastannotate.lines]
389
391
390 def getoffset(self, line):
392 def getoffset(self, line):
391 return self._lastannotate.lines[line]._offset
393 return self._lastannotate.lines[line]._offset
392
394
393 def getalllines(self, start=0, end=0):
395 def getalllines(self, start=0, end=0):
394 """Get all lines that ever occurred in [start, end).
396 """Get all lines that ever occurred in [start, end).
395
397
396 Passing start == end == 0 means "all lines ever".
398 Passing start == end == 0 means "all lines ever".
397
399
398 This works in terms of *internal* program offsets, not line numbers.
400 This works in terms of *internal* program offsets, not line numbers.
399 """
401 """
400 pc = start or 1
402 pc = start or 1
401 lines = []
403 lines = []
402 # only take as many steps as there are instructions in the
404 # only take as many steps as there are instructions in the
403 # program - if we don't find an EOF or our stop-line before
405 # program - if we don't find an EOF or our stop-line before
404 # then, something is badly broken.
406 # then, something is badly broken.
405 for step in pycompat.xrange(len(self._program)):
407 for step in pycompat.xrange(len(self._program)):
406 inst = self._program[pc]
408 inst = self._program[pc]
407 nextpc = pc + 1
409 nextpc = pc + 1
408 if isinstance(inst, _jump):
410 if isinstance(inst, _jump):
409 nextpc = inst._target
411 nextpc = inst._target
410 elif isinstance(inst, _eof):
412 elif isinstance(inst, _eof):
411 return lines
413 return lines
412 elif isinstance(inst, (_jl, _jge)):
414 elif isinstance(inst, (_jl, _jge)):
413 pass
415 pass
414 elif isinstance(inst, _line):
416 elif isinstance(inst, _line):
415 lines.append((inst._rev, inst._origlineno))
417 lines.append((inst._rev, inst._origlineno))
416 else:
418 else:
417 raise LineLogError("Illegal instruction %r" % inst)
419 raise LineLogError("Illegal instruction %r" % inst)
418 if nextpc == end:
420 if nextpc == end:
419 return lines
421 return lines
420 pc = nextpc
422 pc = nextpc
421 raise LineLogError("Failed to perform getalllines")
423 raise LineLogError("Failed to perform getalllines")
@@ -1,184 +1,193 b''
1 from __future__ import absolute_import, print_function
1 from __future__ import absolute_import, print_function
2
2
3 import difflib
3 import difflib
4 import random
4 import random
5 import unittest
5 import unittest
6
6
7 from mercurial import linelog
7 from mercurial import linelog
8
8
9 vecratio = 3 # number of replacelines / number of replacelines_vec
9 vecratio = 3 # number of replacelines / number of replacelines_vec
10 maxlinenum = 0xffffff
10 maxlinenum = 0xffffff
11 maxb1 = 0xffffff
11 maxb1 = 0xffffff
12 maxdeltaa = 10
12 maxdeltaa = 10
13 maxdeltab = 10
13 maxdeltab = 10
14
14
15 def _genedits(seed, endrev):
15 def _genedits(seed, endrev):
16 lines = []
16 lines = []
17 random.seed(seed)
17 random.seed(seed)
18 rev = 0
18 rev = 0
19 for rev in range(0, endrev):
19 for rev in range(0, endrev):
20 n = len(lines)
20 n = len(lines)
21 a1 = random.randint(0, n)
21 a1 = random.randint(0, n)
22 a2 = random.randint(a1, min(n, a1 + maxdeltaa))
22 a2 = random.randint(a1, min(n, a1 + maxdeltaa))
23 b1 = random.randint(0, maxb1)
23 b1 = random.randint(0, maxb1)
24 b2 = random.randint(b1, b1 + maxdeltab)
24 b2 = random.randint(b1, b1 + maxdeltab)
25 usevec = not bool(random.randint(0, vecratio))
25 usevec = not bool(random.randint(0, vecratio))
26 if usevec:
26 if usevec:
27 blines = [(random.randint(0, rev), random.randint(0, maxlinenum))
27 blines = [(random.randint(0, rev), random.randint(0, maxlinenum))
28 for _ in range(b1, b2)]
28 for _ in range(b1, b2)]
29 else:
29 else:
30 blines = [(rev, bidx) for bidx in range(b1, b2)]
30 blines = [(rev, bidx) for bidx in range(b1, b2)]
31 lines[a1:a2] = blines
31 lines[a1:a2] = blines
32 yield lines, rev, a1, a2, b1, b2, blines, usevec
32 yield lines, rev, a1, a2, b1, b2, blines, usevec
33
33
34 class linelogtests(unittest.TestCase):
34 class linelogtests(unittest.TestCase):
35 def testlinelogencodedecode(self):
35 def testlinelogencodedecode(self):
36 program = [linelog._eof(0, 0),
36 program = [linelog._eof(0, 0),
37 linelog._jge(41, 42),
37 linelog._jge(41, 42),
38 linelog._jump(0, 43),
38 linelog._jump(0, 43),
39 linelog._eof(0, 0),
39 linelog._eof(0, 0),
40 linelog._jl(44, 45),
40 linelog._jl(44, 45),
41 linelog._line(46, 47),
41 linelog._line(46, 47),
42 ]
42 ]
43 ll = linelog.linelog(program, maxrev=100)
43 ll = linelog.linelog(program, maxrev=100)
44 enc = ll.encode()
44 enc = ll.encode()
45 # round-trips okay
45 # round-trips okay
46 self.assertEqual(linelog.linelog.fromdata(enc)._program, ll._program)
46 self.assertEqual(linelog.linelog.fromdata(enc)._program, ll._program)
47 self.assertEqual(linelog.linelog.fromdata(enc), ll)
47 self.assertEqual(linelog.linelog.fromdata(enc), ll)
48 # This encoding matches the encoding used by hg-experimental's
48 # This encoding matches the encoding used by hg-experimental's
49 # linelog file, or is supposed to if it doesn't.
49 # linelog file, or is supposed to if it doesn't.
50 self.assertEqual(enc, (b'\x00\x00\x01\x90\x00\x00\x00\x06'
50 self.assertEqual(enc, (b'\x00\x00\x01\x90\x00\x00\x00\x06'
51 b'\x00\x00\x00\xa4\x00\x00\x00*'
51 b'\x00\x00\x00\xa4\x00\x00\x00*'
52 b'\x00\x00\x00\x00\x00\x00\x00+'
52 b'\x00\x00\x00\x00\x00\x00\x00+'
53 b'\x00\x00\x00\x00\x00\x00\x00\x00'
53 b'\x00\x00\x00\x00\x00\x00\x00\x00'
54 b'\x00\x00\x00\xb1\x00\x00\x00-'
54 b'\x00\x00\x00\xb1\x00\x00\x00-'
55 b'\x00\x00\x00\xba\x00\x00\x00/'))
55 b'\x00\x00\x00\xba\x00\x00\x00/'))
56
56
57 def testsimpleedits(self):
57 def testsimpleedits(self):
58 ll = linelog.linelog()
58 ll = linelog.linelog()
59 # Initial revision: add lines 0, 1, and 2
59 # Initial revision: add lines 0, 1, and 2
60 ll.replacelines(1, 0, 0, 0, 3)
60 ll.replacelines(1, 0, 0, 0, 3)
61 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(1)],
61 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(1)],
62 [(1, 0),
62 [(1, 0),
63 (1, 1),
63 (1, 1),
64 (1, 2),
64 (1, 2),
65 ])
65 ])
66 # Replace line 1 with a new line
66 # Replace line 1 with a new line
67 ll.replacelines(2, 1, 2, 1, 2)
67 ll.replacelines(2, 1, 2, 1, 2)
68 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(2)],
68 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(2)],
69 [(1, 0),
69 [(1, 0),
70 (2, 1),
70 (2, 1),
71 (1, 2),
71 (1, 2),
72 ])
72 ])
73 # delete a line out of 2
73 # delete a line out of 2
74 ll.replacelines(3, 1, 2, 0, 0)
74 ll.replacelines(3, 1, 2, 0, 0)
75 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(3)],
75 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(3)],
76 [(1, 0),
76 [(1, 0),
77 (1, 2),
77 (1, 2),
78 ])
78 ])
79 # annotation of 1 is unchanged
79 # annotation of 1 is unchanged
80 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(1)],
80 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(1)],
81 [(1, 0),
81 [(1, 0),
82 (1, 1),
82 (1, 1),
83 (1, 2),
83 (1, 2),
84 ])
84 ])
85 ll.annotate(3) # set internal state to revision 3
85 ll.annotate(3) # set internal state to revision 3
86 start = ll.getoffset(0)
86 start = ll.getoffset(0)
87 end = ll.getoffset(1)
87 end = ll.getoffset(1)
88 self.assertEqual(ll.getalllines(start, end), [
88 self.assertEqual(ll.getalllines(start, end), [
89 (1, 0),
89 (1, 0),
90 (2, 1),
90 (2, 1),
91 (1, 1),
91 (1, 1),
92 ])
92 ])
93 self.assertEqual(ll.getalllines(), [
93 self.assertEqual(ll.getalllines(), [
94 (1, 0),
94 (1, 0),
95 (2, 1),
95 (2, 1),
96 (1, 1),
96 (1, 1),
97 (1, 2),
97 (1, 2),
98 ])
98 ])
99
99
100 def testparseclinelogfile(self):
100 def testparseclinelogfile(self):
101 # This data is what the replacements in testsimpleedits
101 # This data is what the replacements in testsimpleedits
102 # produce when fed to the original linelog.c implementation.
102 # produce when fed to the original linelog.c implementation.
103 data = (b'\x00\x00\x00\x0c\x00\x00\x00\x0f'
103 data = (b'\x00\x00\x00\x0c\x00\x00\x00\x0f'
104 b'\x00\x00\x00\x00\x00\x00\x00\x02'
104 b'\x00\x00\x00\x00\x00\x00\x00\x02'
105 b'\x00\x00\x00\x05\x00\x00\x00\x06'
105 b'\x00\x00\x00\x05\x00\x00\x00\x06'
106 b'\x00\x00\x00\x06\x00\x00\x00\x00'
106 b'\x00\x00\x00\x06\x00\x00\x00\x00'
107 b'\x00\x00\x00\x00\x00\x00\x00\x07'
107 b'\x00\x00\x00\x00\x00\x00\x00\x07'
108 b'\x00\x00\x00\x06\x00\x00\x00\x02'
108 b'\x00\x00\x00\x06\x00\x00\x00\x02'
109 b'\x00\x00\x00\x00\x00\x00\x00\x00'
109 b'\x00\x00\x00\x00\x00\x00\x00\x00'
110 b'\x00\x00\x00\t\x00\x00\x00\t'
110 b'\x00\x00\x00\t\x00\x00\x00\t'
111 b'\x00\x00\x00\x00\x00\x00\x00\x0c'
111 b'\x00\x00\x00\x00\x00\x00\x00\x0c'
112 b'\x00\x00\x00\x08\x00\x00\x00\x05'
112 b'\x00\x00\x00\x08\x00\x00\x00\x05'
113 b'\x00\x00\x00\x06\x00\x00\x00\x01'
113 b'\x00\x00\x00\x06\x00\x00\x00\x01'
114 b'\x00\x00\x00\x00\x00\x00\x00\x05'
114 b'\x00\x00\x00\x00\x00\x00\x00\x05'
115 b'\x00\x00\x00\x0c\x00\x00\x00\x05'
115 b'\x00\x00\x00\x0c\x00\x00\x00\x05'
116 b'\x00\x00\x00\n\x00\x00\x00\x01'
116 b'\x00\x00\x00\n\x00\x00\x00\x01'
117 b'\x00\x00\x00\x00\x00\x00\x00\t')
117 b'\x00\x00\x00\x00\x00\x00\x00\t')
118 llc = linelog.linelog.fromdata(data)
118 llc = linelog.linelog.fromdata(data)
119 self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(1)],
119 self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(1)],
120 [(1, 0),
120 [(1, 0),
121 (1, 1),
121 (1, 1),
122 (1, 2),
122 (1, 2),
123 ])
123 ])
124 self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(2)],
124 self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(2)],
125 [(1, 0),
125 [(1, 0),
126 (2, 1),
126 (2, 1),
127 (1, 2),
127 (1, 2),
128 ])
128 ])
129 self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(3)],
129 self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(3)],
130 [(1, 0),
130 [(1, 0),
131 (1, 2),
131 (1, 2),
132 ])
132 ])
133 # Check we emit the same bytecode.
133 # Check we emit the same bytecode.
134 ll = linelog.linelog()
134 ll = linelog.linelog()
135 # Initial revision: add lines 0, 1, and 2
135 # Initial revision: add lines 0, 1, and 2
136 ll.replacelines(1, 0, 0, 0, 3)
136 ll.replacelines(1, 0, 0, 0, 3)
137 # Replace line 1 with a new line
137 # Replace line 1 with a new line
138 ll.replacelines(2, 1, 2, 1, 2)
138 ll.replacelines(2, 1, 2, 1, 2)
139 # delete a line out of 2
139 # delete a line out of 2
140 ll.replacelines(3, 1, 2, 0, 0)
140 ll.replacelines(3, 1, 2, 0, 0)
141 diff = '\n ' + '\n '.join(difflib.unified_diff(
141 diff = '\n ' + '\n '.join(difflib.unified_diff(
142 ll.debugstr().splitlines(), llc.debugstr().splitlines(),
142 ll.debugstr().splitlines(), llc.debugstr().splitlines(),
143 'python', 'c', lineterm=''))
143 'python', 'c', lineterm=''))
144 self.assertEqual(ll._program, llc._program, 'Program mismatch: ' + diff)
144 self.assertEqual(ll._program, llc._program, 'Program mismatch: ' + diff)
145 # Done as a secondary step so we get a better result if the
145 # Done as a secondary step so we get a better result if the
146 # program is where the mismatch is.
146 # program is where the mismatch is.
147 self.assertEqual(ll, llc)
147 self.assertEqual(ll, llc)
148 self.assertEqual(ll.encode(), data)
148 self.assertEqual(ll.encode(), data)
149
149
150 def testanothersimplecase(self):
150 def testanothersimplecase(self):
151 ll = linelog.linelog()
151 ll = linelog.linelog()
152 ll.replacelines(3, 0, 0, 0, 2)
152 ll.replacelines(3, 0, 0, 0, 2)
153 ll.replacelines(4, 0, 2, 0, 0)
153 ll.replacelines(4, 0, 2, 0, 0)
154 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(4)],
154 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(4)],
155 [])
155 [])
156 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(3)],
156 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(3)],
157 [(3, 0), (3, 1)])
157 [(3, 0), (3, 1)])
158 # rev 2 is empty because contents were only ever introduced in rev 3
158 # rev 2 is empty because contents were only ever introduced in rev 3
159 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(2)],
159 self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(2)],
160 [])
160 [])
161
161
162 def testrandomedits(self):
162 def testrandomedits(self):
163 # Inspired by original linelog tests.
163 # Inspired by original linelog tests.
164 seed = random.random()
164 seed = random.random()
165 numrevs = 2000
165 numrevs = 2000
166 ll = linelog.linelog()
166 ll = linelog.linelog()
167 # Populate linelog
167 # Populate linelog
168 for lines, rev, a1, a2, b1, b2, blines, usevec in _genedits(
168 for lines, rev, a1, a2, b1, b2, blines, usevec in _genedits(
169 seed, numrevs):
169 seed, numrevs):
170 if usevec:
170 if usevec:
171 ll.replacelines_vec(rev, a1, a2, blines)
171 ll.replacelines_vec(rev, a1, a2, blines)
172 else:
172 else:
173 ll.replacelines(rev, a1, a2, b1, b2)
173 ll.replacelines(rev, a1, a2, b1, b2)
174 ar = ll.annotate(rev)
174 ar = ll.annotate(rev)
175 self.assertEqual(ll.annotateresult, lines)
175 self.assertEqual(ll.annotateresult, lines)
176 # Verify we can get back these states by annotating each rev
176 # Verify we can get back these states by annotating each rev
177 for lines, rev, a1, a2, b1, b2, blines, usevec in _genedits(
177 for lines, rev, a1, a2, b1, b2, blines, usevec in _genedits(
178 seed, numrevs):
178 seed, numrevs):
179 ar = ll.annotate(rev)
179 ar = ll.annotate(rev)
180 self.assertEqual([(l.rev, l.linenum) for l in ar], lines)
180 self.assertEqual([(l.rev, l.linenum) for l in ar], lines)
181
181
182 def testinfinitebadprogram(self):
183 ll = linelog.linelog.fromdata(
184 b'\x00\x00\x00\x00\x00\x00\x00\x02' # header
185 b'\x00\x00\x00\x00\x00\x00\x00\x01' # JUMP to self
186 )
187 with self.assertRaises(linelog.LineLogError):
188 # should not be an infinite loop and raise
189 ll.annotate(1)
190
182 if __name__ == '__main__':
191 if __name__ == '__main__':
183 import silenttestrunner
192 import silenttestrunner
184 silenttestrunner.main(__name__)
193 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now