##// END OF EJS Templates
pytype: add type information for `annotateresult.lines`...
marmoute -
r52188:011eec5a default
parent child Browse files
Show More
@@ -1,466 +1,466 b''
1 1 # linelog - efficient cache for annotate data
2 2 #
3 3 # Copyright 2018 Google LLC.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
8 8
9 9 SCCS Weaves are an implementation of
10 10 https://en.wikipedia.org/wiki/Interleaved_deltas. See
11 11 mercurial/helptext/internals/linelog.txt for an exploration of SCCS weaves
12 12 and how linelog works in detail.
13 13
14 14 Here's a hacker's summary: a linelog is a program which is executed in
15 15 the context of a revision. Executing the program emits information
16 16 about lines, including the revision that introduced them and the line
17 17 number in the file at the introducing revision. When an insertion or
18 18 deletion is performed on the file, a jump instruction is used to patch
19 19 in a new body of annotate information.
20 20 """
21 21
22 22 import abc
23 23 import struct
24 24
25 25 from .thirdparty import attr
26 26 from . import pycompat
27 27
28 28 _llentry = struct.Struct(b'>II')
29 29
30 30
31 31 class LineLogError(Exception):
32 32 """Error raised when something bad happens internally in linelog."""
33 33
34 34
35 35 @attr.s
36 36 class lineinfo:
37 37 # Introducing revision of this line.
38 38 rev = attr.ib()
39 39 # Line number for this line in its introducing revision.
40 40 linenum = attr.ib()
41 41 # Private. Offset in the linelog program of this line. Used internally.
42 42 _offset = attr.ib()
43 43
44 44
45 45 @attr.s
46 46 class annotateresult:
47 47 rev = attr.ib()
48 lines = attr.ib()
48 lines = attr.ib(type=bytearray)
49 49 _eof = attr.ib()
50 50
51 51 def __iter__(self):
52 52 return iter(self.lines)
53 53
54 54
55 55 class _llinstruction: # pytype: disable=ignored-metaclass
56 56
57 57 __metaclass__ = abc.ABCMeta
58 58
59 59 @abc.abstractmethod
60 60 def __init__(self, op1, op2):
61 61 pass
62 62
63 63 @abc.abstractmethod
64 64 def __str__(self):
65 65 pass
66 66
67 67 def __repr__(self):
68 68 return str(self)
69 69
70 70 @abc.abstractmethod
71 71 def __eq__(self, other):
72 72 pass
73 73
74 74 @abc.abstractmethod
75 75 def encode(self):
76 76 """Encode this instruction to the binary linelog format."""
77 77
78 78 @abc.abstractmethod
79 79 def execute(self, rev, pc, emit):
80 80 """Execute this instruction.
81 81
82 82 Args:
83 83 rev: The revision we're annotating.
84 84 pc: The current offset in the linelog program.
85 85 emit: A function that accepts a single lineinfo object.
86 86
87 87 Returns:
88 88 The new value of pc. Returns None if exeuction should stop
89 89 (that is, we've found the end of the file.)
90 90 """
91 91
92 92
93 93 class _jge(_llinstruction):
94 94 """If the current rev is greater than or equal to op1, jump to op2."""
95 95
96 96 def __init__(self, op1, op2):
97 97 self._cmprev = op1
98 98 self._target = op2
99 99
100 100 def __str__(self):
101 101 return 'JGE %d %d' % (self._cmprev, self._target)
102 102
103 103 def __eq__(self, other):
104 104 return (
105 105 type(self) == type(other)
106 106 and self._cmprev == other._cmprev
107 107 and self._target == other._target
108 108 )
109 109
110 110 def encode(self):
111 111 return _llentry.pack(self._cmprev << 2, self._target)
112 112
113 113 def execute(self, rev, pc, emit):
114 114 if rev >= self._cmprev:
115 115 return self._target
116 116 return pc + 1
117 117
118 118
119 119 class _jump(_llinstruction):
120 120 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
121 121
122 122 def __init__(self, op1, op2):
123 123 if op1 != 0:
124 124 raise LineLogError(b"malformed JUMP, op1 must be 0, got %d" % op1)
125 125 self._target = op2
126 126
127 127 def __str__(self):
128 128 return 'JUMP %d' % (self._target)
129 129
130 130 def __eq__(self, other):
131 131 return type(self) == type(other) and self._target == other._target
132 132
133 133 def encode(self):
134 134 return _llentry.pack(0, self._target)
135 135
136 136 def execute(self, rev, pc, emit):
137 137 return self._target
138 138
139 139
140 140 class _eof(_llinstruction):
141 141 """EOF is expressed as a JGE that always jumps to 0."""
142 142
143 143 def __init__(self, op1, op2):
144 144 if op1 != 0:
145 145 raise LineLogError(b"malformed EOF, op1 must be 0, got %d" % op1)
146 146 if op2 != 0:
147 147 raise LineLogError(b"malformed EOF, op2 must be 0, got %d" % op2)
148 148
149 149 def __str__(self):
150 150 return r'EOF'
151 151
152 152 def __eq__(self, other):
153 153 return type(self) == type(other)
154 154
155 155 def encode(self):
156 156 return _llentry.pack(0, 0)
157 157
158 158 def execute(self, rev, pc, emit):
159 159 return None
160 160
161 161
162 162 class _jl(_llinstruction):
163 163 """If the current rev is less than op1, jump to op2."""
164 164
165 165 def __init__(self, op1, op2):
166 166 self._cmprev = op1
167 167 self._target = op2
168 168
169 169 def __str__(self):
170 170 return 'JL %d %d' % (self._cmprev, self._target)
171 171
172 172 def __eq__(self, other):
173 173 return (
174 174 type(self) == type(other)
175 175 and self._cmprev == other._cmprev
176 176 and self._target == other._target
177 177 )
178 178
179 179 def encode(self):
180 180 return _llentry.pack(1 | (self._cmprev << 2), self._target)
181 181
182 182 def execute(self, rev, pc, emit):
183 183 if rev < self._cmprev:
184 184 return self._target
185 185 return pc + 1
186 186
187 187
188 188 class _line(_llinstruction):
189 189 """Emit a line."""
190 190
191 191 def __init__(self, op1, op2):
192 192 # This line was introduced by this revision number.
193 193 self._rev = op1
194 194 # This line had the specified line number in the introducing revision.
195 195 self._origlineno = op2
196 196
197 197 def __str__(self):
198 198 return 'LINE %d %d' % (self._rev, self._origlineno)
199 199
200 200 def __eq__(self, other):
201 201 return (
202 202 type(self) == type(other)
203 203 and self._rev == other._rev
204 204 and self._origlineno == other._origlineno
205 205 )
206 206
207 207 def encode(self):
208 208 return _llentry.pack(2 | (self._rev << 2), self._origlineno)
209 209
210 210 def execute(self, rev, pc, emit):
211 211 emit(lineinfo(self._rev, self._origlineno, pc))
212 212 return pc + 1
213 213
214 214
215 215 def _decodeone(data, offset):
216 216 """Decode a single linelog instruction from an offset in a buffer."""
217 217 try:
218 218 op1, op2 = _llentry.unpack_from(data, offset)
219 219 except struct.error as e:
220 220 raise LineLogError(b'reading an instruction failed: %r' % e)
221 221 opcode = op1 & 0b11
222 222 op1 = op1 >> 2
223 223 if opcode == 0:
224 224 if op1 == 0:
225 225 if op2 == 0:
226 226 return _eof(op1, op2)
227 227 return _jump(op1, op2)
228 228 return _jge(op1, op2)
229 229 elif opcode == 1:
230 230 return _jl(op1, op2)
231 231 elif opcode == 2:
232 232 return _line(op1, op2)
233 233 raise NotImplementedError(b'Unimplemented opcode %r' % opcode)
234 234
235 235
236 236 class linelog:
237 237 """Efficient cache for per-line history information."""
238 238
239 239 def __init__(self, program=None, maxrev=0):
240 240 if program is None:
241 241 # We pad the program with an extra leading EOF so that our
242 242 # offsets will match the C code exactly. This means we can
243 243 # interoperate with the C code.
244 244 program = [_eof(0, 0), _eof(0, 0)]
245 245 self._program = program
246 246 self._lastannotate = None
247 247 self._maxrev = maxrev
248 248
249 249 def __eq__(self, other):
250 250 return (
251 251 type(self) == type(other)
252 252 and self._program == other._program
253 253 and self._maxrev == other._maxrev
254 254 )
255 255
256 256 def __repr__(self):
257 257 return '<linelog at %s: maxrev=%d size=%d>' % (
258 258 hex(id(self)),
259 259 self._maxrev,
260 260 len(self._program),
261 261 )
262 262
263 263 def debugstr(self):
264 264 fmt = '%%%dd %%s' % len(str(len(self._program)))
265 265 return pycompat.sysstr(b'\n').join(
266 266 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1)
267 267 )
268 268
269 269 @classmethod
270 270 def fromdata(cls, buf):
271 271 if len(buf) % _llentry.size != 0:
272 272 raise LineLogError(
273 273 b"invalid linelog buffer size %d (must be a multiple of %d)"
274 274 % (len(buf), _llentry.size)
275 275 )
276 276 expected = len(buf) / _llentry.size
277 277 fakejge = _decodeone(buf, 0)
278 278 if isinstance(fakejge, _jump):
279 279 maxrev = 0
280 280 elif isinstance(fakejge, (_jge, _jl)):
281 281 maxrev = fakejge._cmprev
282 282 else:
283 283 raise LineLogError(
284 284 'Expected one of _jump, _jge, or _jl. Got %s.'
285 285 % type(fakejge).__name__
286 286 )
287 287 assert isinstance(fakejge, (_jump, _jge, _jl)) # help pytype
288 288 numentries = fakejge._target
289 289 if expected != numentries:
290 290 raise LineLogError(
291 291 b"corrupt linelog data: claimed"
292 292 b" %d entries but given data for %d entries"
293 293 % (expected, numentries)
294 294 )
295 295 instructions = [_eof(0, 0)]
296 296 for offset in range(1, numentries):
297 297 instructions.append(_decodeone(buf, offset * _llentry.size))
298 298 return cls(instructions, maxrev=maxrev)
299 299
300 300 def encode(self):
301 301 hdr = _jge(self._maxrev, len(self._program)).encode()
302 302 return hdr + b''.join(i.encode() for i in self._program[1:])
303 303
304 304 def clear(self):
305 305 self._program = []
306 306 self._maxrev = 0
307 307 self._lastannotate = None
308 308
309 309 def replacelines_vec(self, rev, a1, a2, blines):
310 310 return self.replacelines(
311 311 rev, a1, a2, 0, len(blines), _internal_blines=blines
312 312 )
313 313
314 314 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
315 315 """Replace lines [a1, a2) with lines [b1, b2)."""
316 316 if self._lastannotate:
317 317 # TODO(augie): make replacelines() accept a revision at
318 318 # which we're editing as well as a revision to mark
319 319 # responsible for the edits. In hg-experimental it's
320 320 # stateful like this, so we're doing the same thing to
321 321 # retain compatibility with absorb until that's imported.
322 322 ar = self._lastannotate
323 323 else:
324 324 ar = self.annotate(rev)
325 325 # ar = self.annotate(self._maxrev)
326 326 if a1 > len(ar.lines):
327 327 raise LineLogError(
328 328 b'%d contains %d lines, tried to access line %d'
329 329 % (rev, len(ar.lines), a1)
330 330 )
331 331 elif a1 == len(ar.lines):
332 332 # Simulated EOF instruction since we're at EOF, which
333 333 # doesn't have a "real" line.
334 334 a1inst = _eof(0, 0)
335 335 a1info = lineinfo(0, 0, ar._eof)
336 336 else:
337 337 a1info = ar.lines[a1]
338 338 a1inst = self._program[a1info._offset]
339 339 programlen = self._program.__len__
340 340 oldproglen = programlen()
341 341 appendinst = self._program.append
342 342
343 343 # insert
344 344 blineinfos = []
345 345 bappend = blineinfos.append
346 346 if b1 < b2:
347 347 # Determine the jump target for the JGE at the start of
348 348 # the new block.
349 349 tgt = oldproglen + (b2 - b1 + 1)
350 350 # Jump to skip the insert if we're at an older revision.
351 351 appendinst(_jl(rev, tgt))
352 352 for linenum in range(b1, b2):
353 353 if _internal_blines is None:
354 354 bappend(lineinfo(rev, linenum, programlen()))
355 355 appendinst(_line(rev, linenum))
356 356 else:
357 357 newrev, newlinenum = _internal_blines[linenum]
358 358 bappend(lineinfo(newrev, newlinenum, programlen()))
359 359 appendinst(_line(newrev, newlinenum))
360 360 # delete
361 361 if a1 < a2:
362 362 if a2 > len(ar.lines):
363 363 raise LineLogError(
364 364 b'%d contains %d lines, tried to access line %d'
365 365 % (rev, len(ar.lines), a2)
366 366 )
367 367 elif a2 == len(ar.lines):
368 368 endaddr = ar._eof
369 369 else:
370 370 endaddr = ar.lines[a2]._offset
371 371 if a2 > 0 and rev < self._maxrev:
372 372 # If we're here, we're deleting a chunk of an old
373 373 # commit, so we need to be careful and not touch
374 374 # invisible lines between a2-1 and a2 (IOW, lines that
375 375 # are added later).
376 376 endaddr = ar.lines[a2 - 1]._offset + 1
377 377 appendinst(_jge(rev, endaddr))
378 378 # copy instruction from a1
379 379 a1instpc = programlen()
380 380 appendinst(a1inst)
381 381 # if a1inst isn't a jump or EOF, then we need to add an unconditional
382 382 # jump back into the program here.
383 383 if not isinstance(a1inst, (_jump, _eof)):
384 384 appendinst(_jump(0, a1info._offset + 1))
385 385 # Patch instruction at a1, which makes our patch live.
386 386 self._program[a1info._offset] = _jump(0, oldproglen)
387 387
388 388 # Update self._lastannotate in place. This serves as a cache to avoid
389 389 # expensive "self.annotate" in this function, when "replacelines" is
390 390 # used continuously.
391 391 if len(self._lastannotate.lines) > a1:
392 392 self._lastannotate.lines[a1]._offset = a1instpc
393 393 else:
394 394 assert isinstance(a1inst, _eof)
395 395 self._lastannotate._eof = a1instpc
396 396 self._lastannotate.lines[a1:a2] = blineinfos
397 397 self._lastannotate.rev = max(self._lastannotate.rev, rev)
398 398
399 399 if rev > self._maxrev:
400 400 self._maxrev = rev
401 401
402 402 def annotate(self, rev):
403 403 pc = 1
404 404 lines = []
405 405 executed = 0
406 406 # Sanity check: if instructions executed exceeds len(program), we
407 407 # hit an infinite loop in the linelog program somehow and we
408 408 # should stop.
409 409 while pc is not None and executed < len(self._program):
410 410 inst = self._program[pc]
411 411 lastpc = pc
412 412 pc = inst.execute(rev, pc, lines.append)
413 413 executed += 1
414 414 if pc is not None:
415 415 raise LineLogError(
416 416 r'Probably hit an infinite loop in linelog. Program:\n'
417 417 + self.debugstr()
418 418 )
419 419 ar = annotateresult(rev, lines, lastpc)
420 420 self._lastannotate = ar
421 421 return ar
422 422
423 423 @property
424 424 def maxrev(self):
425 425 return self._maxrev
426 426
427 427 # Stateful methods which depend on the value of the last
428 428 # annotation run. This API is for compatiblity with the original
429 429 # linelog, and we should probably consider refactoring it.
430 430 @property
431 431 def annotateresult(self):
432 432 """Return the last annotation result. C linelog code exposed this."""
433 433 return [(l.rev, l.linenum) for l in self._lastannotate.lines]
434 434
435 435 def getoffset(self, line):
436 436 return self._lastannotate.lines[line]._offset
437 437
438 438 def getalllines(self, start=0, end=0):
439 439 """Get all lines that ever occurred in [start, end).
440 440
441 441 Passing start == end == 0 means "all lines ever".
442 442
443 443 This works in terms of *internal* program offsets, not line numbers.
444 444 """
445 445 pc = start or 1
446 446 lines = []
447 447 # only take as many steps as there are instructions in the
448 448 # program - if we don't find an EOF or our stop-line before
449 449 # then, something is badly broken.
450 450 for step in range(len(self._program)):
451 451 inst = self._program[pc]
452 452 nextpc = pc + 1
453 453 if isinstance(inst, _jump):
454 454 nextpc = inst._target
455 455 elif isinstance(inst, _eof):
456 456 return lines
457 457 elif isinstance(inst, (_jl, _jge)):
458 458 pass
459 459 elif isinstance(inst, _line):
460 460 lines.append((inst._rev, inst._origlineno))
461 461 else:
462 462 raise LineLogError(b"Illegal instruction %r" % inst)
463 463 if nextpc == end:
464 464 return lines
465 465 pc = nextpc
466 466 raise LineLogError(b"Failed to perform getalllines")
General Comments 0
You need to be logged in to leave comments. Login now