##// END OF EJS Templates
linelog: suppress annoying pytype warning about an ignored metaclass...
Augie Fackler -
r43773:acc4047c default
parent child Browse files
Show More
@@ -1,461 +1,461
1 1 # linelog - efficient cache for annotate data
2 2 #
3 3 # Copyright 2018 Google LLC.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
8 8
9 9 SCCS Weaves are an implementation of
10 10 https://en.wikipedia.org/wiki/Interleaved_deltas. See
11 11 mercurial/help/internals/linelog.txt for an exploration of SCCS weaves
12 12 and how linelog works in detail.
13 13
14 14 Here's a hacker's summary: a linelog is a program which is executed in
15 15 the context of a revision. Executing the program emits information
16 16 about lines, including the revision that introduced them and the line
17 17 number in the file at the introducing revision. When an insertion or
18 18 deletion is performed on the file, a jump instruction is used to patch
19 19 in a new body of annotate information.
20 20 """
21 21 from __future__ import absolute_import, print_function
22 22
23 23 import abc
24 24 import struct
25 25
26 26 from .thirdparty import attr
27 27 from . import pycompat
28 28
29 29 _llentry = struct.Struct(b'>II')
30 30
31 31
32 32 class LineLogError(Exception):
33 33 """Error raised when something bad happens internally in linelog."""
34 34
35 35
36 36 @attr.s
37 37 class lineinfo(object):
38 38 # Introducing revision of this line.
39 39 rev = attr.ib()
40 40 # Line number for this line in its introducing revision.
41 41 linenum = attr.ib()
42 42 # Private. Offset in the linelog program of this line. Used internally.
43 43 _offset = attr.ib()
44 44
45 45
46 46 @attr.s
47 47 class annotateresult(object):
48 48 rev = attr.ib()
49 49 lines = attr.ib()
50 50 _eof = attr.ib()
51 51
52 52 def __iter__(self):
53 53 return iter(self.lines)
54 54
55 55
56 class _llinstruction(object):
56 class _llinstruction(object): # pytype: disable=ignored-metaclass
57 57
58 58 __metaclass__ = abc.ABCMeta
59 59
60 60 @abc.abstractmethod
61 61 def __init__(self, op1, op2):
62 62 pass
63 63
64 64 @abc.abstractmethod
65 65 def __str__(self):
66 66 pass
67 67
68 68 def __repr__(self):
69 69 return str(self)
70 70
71 71 @abc.abstractmethod
72 72 def __eq__(self, other):
73 73 pass
74 74
75 75 @abc.abstractmethod
76 76 def encode(self):
77 77 """Encode this instruction to the binary linelog format."""
78 78
79 79 @abc.abstractmethod
80 80 def execute(self, rev, pc, emit):
81 81 """Execute this instruction.
82 82
83 83 Args:
84 84 rev: The revision we're annotating.
85 85 pc: The current offset in the linelog program.
86 86 emit: A function that accepts a single lineinfo object.
87 87
88 88 Returns:
89 89 The new value of pc. Returns None if exeuction should stop
90 90 (that is, we've found the end of the file.)
91 91 """
92 92
93 93
94 94 class _jge(_llinstruction):
95 95 """If the current rev is greater than or equal to op1, jump to op2."""
96 96
97 97 def __init__(self, op1, op2):
98 98 self._cmprev = op1
99 99 self._target = op2
100 100
101 101 def __str__(self):
102 102 return r'JGE %d %d' % (self._cmprev, self._target)
103 103
104 104 def __eq__(self, other):
105 105 return (
106 106 type(self) == type(other)
107 107 and self._cmprev == other._cmprev
108 108 and self._target == other._target
109 109 )
110 110
111 111 def encode(self):
112 112 return _llentry.pack(self._cmprev << 2, self._target)
113 113
114 114 def execute(self, rev, pc, emit):
115 115 if rev >= self._cmprev:
116 116 return self._target
117 117 return pc + 1
118 118
119 119
120 120 class _jump(_llinstruction):
121 121 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
122 122
123 123 def __init__(self, op1, op2):
124 124 if op1 != 0:
125 125 raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)
126 126 self._target = op2
127 127
128 128 def __str__(self):
129 129 return r'JUMP %d' % (self._target)
130 130
131 131 def __eq__(self, other):
132 132 return type(self) == type(other) and self._target == other._target
133 133
134 134 def encode(self):
135 135 return _llentry.pack(0, self._target)
136 136
137 137 def execute(self, rev, pc, emit):
138 138 return self._target
139 139
140 140
141 141 class _eof(_llinstruction):
142 142 """EOF is expressed as a JGE that always jumps to 0."""
143 143
144 144 def __init__(self, op1, op2):
145 145 if op1 != 0:
146 146 raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)
147 147 if op2 != 0:
148 148 raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)
149 149
150 150 def __str__(self):
151 151 return r'EOF'
152 152
153 153 def __eq__(self, other):
154 154 return type(self) == type(other)
155 155
156 156 def encode(self):
157 157 return _llentry.pack(0, 0)
158 158
159 159 def execute(self, rev, pc, emit):
160 160 return None
161 161
162 162
163 163 class _jl(_llinstruction):
164 164 """If the current rev is less than op1, jump to op2."""
165 165
166 166 def __init__(self, op1, op2):
167 167 self._cmprev = op1
168 168 self._target = op2
169 169
170 170 def __str__(self):
171 171 return r'JL %d %d' % (self._cmprev, self._target)
172 172
173 173 def __eq__(self, other):
174 174 return (
175 175 type(self) == type(other)
176 176 and self._cmprev == other._cmprev
177 177 and self._target == other._target
178 178 )
179 179
180 180 def encode(self):
181 181 return _llentry.pack(1 | (self._cmprev << 2), self._target)
182 182
183 183 def execute(self, rev, pc, emit):
184 184 if rev < self._cmprev:
185 185 return self._target
186 186 return pc + 1
187 187
188 188
189 189 class _line(_llinstruction):
190 190 """Emit a line."""
191 191
192 192 def __init__(self, op1, op2):
193 193 # This line was introduced by this revision number.
194 194 self._rev = op1
195 195 # This line had the specified line number in the introducing revision.
196 196 self._origlineno = op2
197 197
198 198 def __str__(self):
199 199 return r'LINE %d %d' % (self._rev, self._origlineno)
200 200
201 201 def __eq__(self, other):
202 202 return (
203 203 type(self) == type(other)
204 204 and self._rev == other._rev
205 205 and self._origlineno == other._origlineno
206 206 )
207 207
208 208 def encode(self):
209 209 return _llentry.pack(2 | (self._rev << 2), self._origlineno)
210 210
211 211 def execute(self, rev, pc, emit):
212 212 emit(lineinfo(self._rev, self._origlineno, pc))
213 213 return pc + 1
214 214
215 215
216 216 def _decodeone(data, offset):
217 217 """Decode a single linelog instruction from an offset in a buffer."""
218 218 try:
219 219 op1, op2 = _llentry.unpack_from(data, offset)
220 220 except struct.error as e:
221 221 raise LineLogError(b'reading an instruction failed: %r' % e)
222 222 opcode = op1 & 0b11
223 223 op1 = op1 >> 2
224 224 if opcode == 0:
225 225 if op1 == 0:
226 226 if op2 == 0:
227 227 return _eof(op1, op2)
228 228 return _jump(op1, op2)
229 229 return _jge(op1, op2)
230 230 elif opcode == 1:
231 231 return _jl(op1, op2)
232 232 elif opcode == 2:
233 233 return _line(op1, op2)
234 234 raise NotImplementedError(b'Unimplemented opcode %r' % opcode)
235 235
236 236
237 237 class linelog(object):
238 238 """Efficient cache for per-line history information."""
239 239
240 240 def __init__(self, program=None, maxrev=0):
241 241 if program is None:
242 242 # We pad the program with an extra leading EOF so that our
243 243 # offsets will match the C code exactly. This means we can
244 244 # interoperate with the C code.
245 245 program = [_eof(0, 0), _eof(0, 0)]
246 246 self._program = program
247 247 self._lastannotate = None
248 248 self._maxrev = maxrev
249 249
250 250 def __eq__(self, other):
251 251 return (
252 252 type(self) == type(other)
253 253 and self._program == other._program
254 254 and self._maxrev == other._maxrev
255 255 )
256 256
257 257 def __repr__(self):
258 258 return b'<linelog at %s: maxrev=%d size=%d>' % (
259 259 hex(id(self)),
260 260 self._maxrev,
261 261 len(self._program),
262 262 )
263 263
264 264 def debugstr(self):
265 265 fmt = r'%%%dd %%s' % len(str(len(self._program)))
266 266 return pycompat.sysstr(b'\n').join(
267 267 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)
268 268 )
269 269
270 270 @classmethod
271 271 def fromdata(cls, buf):
272 272 if len(buf) % _llentry.size != 0:
273 273 raise LineLogError(
274 274 b"invalid linelog buffer size %d (must be a multiple of %d)"
275 275 % (len(buf), _llentry.size)
276 276 )
277 277 expected = len(buf) / _llentry.size
278 278 fakejge = _decodeone(buf, 0)
279 279 if isinstance(fakejge, _jump):
280 280 maxrev = 0
281 281 else:
282 282 maxrev = fakejge._cmprev
283 283 numentries = fakejge._target
284 284 if expected != numentries:
285 285 raise LineLogError(
286 286 b"corrupt linelog data: claimed"
287 287 b" %d entries but given data for %d entries"
288 288 % (expected, numentries)
289 289 )
290 290 instructions = [_eof(0, 0)]
291 291 for offset in pycompat.xrange(1, numentries):
292 292 instructions.append(_decodeone(buf, offset * _llentry.size))
293 293 return cls(instructions, maxrev=maxrev)
294 294
295 295 def encode(self):
296 296 hdr = _jge(self._maxrev, len(self._program)).encode()
297 297 return hdr + b''.join(i.encode() for i in self._program[1:])
298 298
299 299 def clear(self):
300 300 self._program = []
301 301 self._maxrev = 0
302 302 self._lastannotate = None
303 303
304 304 def replacelines_vec(self, rev, a1, a2, blines):
305 305 return self.replacelines(
306 306 rev, a1, a2, 0, len(blines), _internal_blines=blines
307 307 )
308 308
309 309 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
310 310 """Replace lines [a1, a2) with lines [b1, b2)."""
311 311 if self._lastannotate:
312 312 # TODO(augie): make replacelines() accept a revision at
313 313 # which we're editing as well as a revision to mark
314 314 # responsible for the edits. In hg-experimental it's
315 315 # stateful like this, so we're doing the same thing to
316 316 # retain compatibility with absorb until that's imported.
317 317 ar = self._lastannotate
318 318 else:
319 319 ar = self.annotate(rev)
320 320 # ar = self.annotate(self._maxrev)
321 321 if a1 > len(ar.lines):
322 322 raise LineLogError(
323 323 b'%d contains %d lines, tried to access line %d'
324 324 % (rev, len(ar.lines), a1)
325 325 )
326 326 elif a1 == len(ar.lines):
327 327 # Simulated EOF instruction since we're at EOF, which
328 328 # doesn't have a "real" line.
329 329 a1inst = _eof(0, 0)
330 330 a1info = lineinfo(0, 0, ar._eof)
331 331 else:
332 332 a1info = ar.lines[a1]
333 333 a1inst = self._program[a1info._offset]
334 334 programlen = self._program.__len__
335 335 oldproglen = programlen()
336 336 appendinst = self._program.append
337 337
338 338 # insert
339 339 blineinfos = []
340 340 bappend = blineinfos.append
341 341 if b1 < b2:
342 342 # Determine the jump target for the JGE at the start of
343 343 # the new block.
344 344 tgt = oldproglen + (b2 - b1 + 1)
345 345 # Jump to skip the insert if we're at an older revision.
346 346 appendinst(_jl(rev, tgt))
347 347 for linenum in pycompat.xrange(b1, b2):
348 348 if _internal_blines is None:
349 349 bappend(lineinfo(rev, linenum, programlen()))
350 350 appendinst(_line(rev, linenum))
351 351 else:
352 352 newrev, newlinenum = _internal_blines[linenum]
353 353 bappend(lineinfo(newrev, newlinenum, programlen()))
354 354 appendinst(_line(newrev, newlinenum))
355 355 # delete
356 356 if a1 < a2:
357 357 if a2 > len(ar.lines):
358 358 raise LineLogError(
359 359 b'%d contains %d lines, tried to access line %d'
360 360 % (rev, len(ar.lines), a2)
361 361 )
362 362 elif a2 == len(ar.lines):
363 363 endaddr = ar._eof
364 364 else:
365 365 endaddr = ar.lines[a2]._offset
366 366 if a2 > 0 and rev < self._maxrev:
367 367 # If we're here, we're deleting a chunk of an old
368 368 # commit, so we need to be careful and not touch
369 369 # invisible lines between a2-1 and a2 (IOW, lines that
370 370 # are added later).
371 371 endaddr = ar.lines[a2 - 1]._offset + 1
372 372 appendinst(_jge(rev, endaddr))
373 373 # copy instruction from a1
374 374 a1instpc = programlen()
375 375 appendinst(a1inst)
376 376 # if a1inst isn't a jump or EOF, then we need to add an unconditional
377 377 # jump back into the program here.
378 378 if not isinstance(a1inst, (_jump, _eof)):
379 379 appendinst(_jump(0, a1info._offset + 1))
380 380 # Patch instruction at a1, which makes our patch live.
381 381 self._program[a1info._offset] = _jump(0, oldproglen)
382 382
383 383 # Update self._lastannotate in place. This serves as a cache to avoid
384 384 # expensive "self.annotate" in this function, when "replacelines" is
385 385 # used continuously.
386 386 if len(self._lastannotate.lines) > a1:
387 387 self._lastannotate.lines[a1]._offset = a1instpc
388 388 else:
389 389 assert isinstance(a1inst, _eof)
390 390 self._lastannotate._eof = a1instpc
391 391 self._lastannotate.lines[a1:a2] = blineinfos
392 392 self._lastannotate.rev = max(self._lastannotate.rev, rev)
393 393
394 394 if rev > self._maxrev:
395 395 self._maxrev = rev
396 396
397 397 def annotate(self, rev):
398 398 pc = 1
399 399 lines = []
400 400 executed = 0
401 401 # Sanity check: if instructions executed exceeds len(program), we
402 402 # hit an infinite loop in the linelog program somehow and we
403 403 # should stop.
404 404 while pc is not None and executed < len(self._program):
405 405 inst = self._program[pc]
406 406 lastpc = pc
407 407 pc = inst.execute(rev, pc, lines.append)
408 408 executed += 1
409 409 if pc is not None:
410 410 raise LineLogError(
411 411 r'Probably hit an infinite loop in linelog. Program:\n'
412 412 + self.debugstr()
413 413 )
414 414 ar = annotateresult(rev, lines, lastpc)
415 415 self._lastannotate = ar
416 416 return ar
417 417
418 418 @property
419 419 def maxrev(self):
420 420 return self._maxrev
421 421
422 422 # Stateful methods which depend on the value of the last
423 423 # annotation run. This API is for compatiblity with the original
424 424 # linelog, and we should probably consider refactoring it.
425 425 @property
426 426 def annotateresult(self):
427 427 """Return the last annotation result. C linelog code exposed this."""
428 428 return [(l.rev, l.linenum) for l in self._lastannotate.lines]
429 429
430 430 def getoffset(self, line):
431 431 return self._lastannotate.lines[line]._offset
432 432
433 433 def getalllines(self, start=0, end=0):
434 434 """Get all lines that ever occurred in [start, end).
435 435
436 436 Passing start == end == 0 means "all lines ever".
437 437
438 438 This works in terms of *internal* program offsets, not line numbers.
439 439 """
440 440 pc = start or 1
441 441 lines = []
442 442 # only take as many steps as there are instructions in the
443 443 # program - if we don't find an EOF or our stop-line before
444 444 # then, something is badly broken.
445 445 for step in pycompat.xrange(len(self._program)):
446 446 inst = self._program[pc]
447 447 nextpc = pc + 1
448 448 if isinstance(inst, _jump):
449 449 nextpc = inst._target
450 450 elif isinstance(inst, _eof):
451 451 return lines
452 452 elif isinstance(inst, (_jl, _jge)):
453 453 pass
454 454 elif isinstance(inst, _line):
455 455 lines.append((inst._rev, inst._origlineno))
456 456 else:
457 457 raise LineLogError(b"Illegal instruction %r" % inst)
458 458 if nextpc == end:
459 459 return lines
460 460 pc = nextpc
461 461 raise LineLogError(b"Failed to perform getalllines")
General Comments 0
You need to be logged in to leave comments. Login now