##// END OF EJS Templates
linelog: extract `len(self._program)` to a local function...
Jun Wu -
r39006:32b1967b default
parent child Browse files
Show More
@@ -1,423 +1,424 b''
1 1 # linelog - efficient cache for annotate data
2 2 #
3 3 # Copyright 2018 Google LLC.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """linelog is an efficient cache for annotate data inspired by SCCS Weaves.
8 8
9 9 SCCS Weaves are an implementation of
10 10 https://en.wikipedia.org/wiki/Interleaved_deltas. See
11 11 mercurial/help/internals/linelog.txt for an exploration of SCCS weaves
12 12 and how linelog works in detail.
13 13
14 14 Here's a hacker's summary: a linelog is a program which is executed in
15 15 the context of a revision. Executing the program emits information
16 16 about lines, including the revision that introduced them and the line
17 17 number in the file at the introducing revision. When an insertion or
18 18 deletion is performed on the file, a jump instruction is used to patch
19 19 in a new body of annotate information.
20 20 """
21 21 from __future__ import absolute_import, print_function
22 22
23 23 import abc
24 24 import struct
25 25
26 26 from .thirdparty import (
27 27 attr,
28 28 )
29 29 from . import (
30 30 pycompat,
31 31 )
32 32
33 33 _llentry = struct.Struct('>II')
34 34
35 35 class LineLogError(Exception):
36 36 """Error raised when something bad happens internally in linelog."""
37 37
38 38 @attr.s
39 39 class lineinfo(object):
40 40 # Introducing revision of this line.
41 41 rev = attr.ib()
42 42 # Line number for this line in its introducing revision.
43 43 linenum = attr.ib()
44 44 # Private. Offset in the linelog program of this line. Used internally.
45 45 _offset = attr.ib()
46 46
47 47 @attr.s
48 48 class annotateresult(object):
49 49 rev = attr.ib()
50 50 lines = attr.ib()
51 51 _eof = attr.ib()
52 52
53 53 def __iter__(self):
54 54 return iter(self.lines)
55 55
56 56 class _llinstruction(object):
57 57
58 58 __metaclass__ = abc.ABCMeta
59 59
60 60 @abc.abstractmethod
61 61 def __init__(self, op1, op2):
62 62 pass
63 63
64 64 @abc.abstractmethod
65 65 def __str__(self):
66 66 pass
67 67
68 68 def __repr__(self):
69 69 return str(self)
70 70
71 71 @abc.abstractmethod
72 72 def __eq__(self, other):
73 73 pass
74 74
75 75 @abc.abstractmethod
76 76 def encode(self):
77 77 """Encode this instruction to the binary linelog format."""
78 78
79 79 @abc.abstractmethod
80 80 def execute(self, rev, pc, emit):
81 81 """Execute this instruction.
82 82
83 83 Args:
84 84 rev: The revision we're annotating.
85 85 pc: The current offset in the linelog program.
86 86 emit: A function that accepts a single lineinfo object.
87 87
88 88 Returns:
89 89 The new value of pc. Returns None if exeuction should stop
90 90 (that is, we've found the end of the file.)
91 91 """
92 92
93 93 class _jge(_llinstruction):
94 94 """If the current rev is greater than or equal to op1, jump to op2."""
95 95
96 96 def __init__(self, op1, op2):
97 97 self._cmprev = op1
98 98 self._target = op2
99 99
100 100 def __str__(self):
101 101 return r'JGE %d %d' % (self._cmprev, self._target)
102 102
103 103 def __eq__(self, other):
104 104 return (type(self) == type(other)
105 105 and self._cmprev == other._cmprev
106 106 and self._target == other._target)
107 107
108 108 def encode(self):
109 109 return _llentry.pack(self._cmprev << 2, self._target)
110 110
111 111 def execute(self, rev, pc, emit):
112 112 if rev >= self._cmprev:
113 113 return self._target
114 114 return pc + 1
115 115
116 116 class _jump(_llinstruction):
117 117 """Unconditional jumps are expressed as a JGE with op1 set to 0."""
118 118
119 119 def __init__(self, op1, op2):
120 120 if op1 != 0:
121 121 raise LineLogError("malformed JUMP, op1 must be 0, got %d" % op1)
122 122 self._target = op2
123 123
124 124 def __str__(self):
125 125 return r'JUMP %d' % (self._target)
126 126
127 127 def __eq__(self, other):
128 128 return (type(self) == type(other)
129 129 and self._target == other._target)
130 130
131 131 def encode(self):
132 132 return _llentry.pack(0, self._target)
133 133
134 134 def execute(self, rev, pc, emit):
135 135 return self._target
136 136
137 137 class _eof(_llinstruction):
138 138 """EOF is expressed as a JGE that always jumps to 0."""
139 139
140 140 def __init__(self, op1, op2):
141 141 if op1 != 0:
142 142 raise LineLogError("malformed EOF, op1 must be 0, got %d" % op1)
143 143 if op2 != 0:
144 144 raise LineLogError("malformed EOF, op2 must be 0, got %d" % op2)
145 145
146 146 def __str__(self):
147 147 return r'EOF'
148 148
149 149 def __eq__(self, other):
150 150 return type(self) == type(other)
151 151
152 152 def encode(self):
153 153 return _llentry.pack(0, 0)
154 154
155 155 def execute(self, rev, pc, emit):
156 156 return None
157 157
158 158 class _jl(_llinstruction):
159 159 """If the current rev is less than op1, jump to op2."""
160 160
161 161 def __init__(self, op1, op2):
162 162 self._cmprev = op1
163 163 self._target = op2
164 164
165 165 def __str__(self):
166 166 return r'JL %d %d' % (self._cmprev, self._target)
167 167
168 168 def __eq__(self, other):
169 169 return (type(self) == type(other)
170 170 and self._cmprev == other._cmprev
171 171 and self._target == other._target)
172 172
173 173 def encode(self):
174 174 return _llentry.pack(1 | (self._cmprev << 2), self._target)
175 175
176 176 def execute(self, rev, pc, emit):
177 177 if rev < self._cmprev:
178 178 return self._target
179 179 return pc + 1
180 180
181 181 class _line(_llinstruction):
182 182 """Emit a line."""
183 183
184 184 def __init__(self, op1, op2):
185 185 # This line was introduced by this revision number.
186 186 self._rev = op1
187 187 # This line had the specified line number in the introducing revision.
188 188 self._origlineno = op2
189 189
190 190 def __str__(self):
191 191 return r'LINE %d %d' % (self._rev, self._origlineno)
192 192
193 193 def __eq__(self, other):
194 194 return (type(self) == type(other)
195 195 and self._rev == other._rev
196 196 and self._origlineno == other._origlineno)
197 197
198 198 def encode(self):
199 199 return _llentry.pack(2 | (self._rev << 2), self._origlineno)
200 200
201 201 def execute(self, rev, pc, emit):
202 202 emit(lineinfo(self._rev, self._origlineno, pc))
203 203 return pc + 1
204 204
205 205 def _decodeone(data, offset):
206 206 """Decode a single linelog instruction from an offset in a buffer."""
207 207 try:
208 208 op1, op2 = _llentry.unpack_from(data, offset)
209 209 except struct.error as e:
210 210 raise LineLogError('reading an instruction failed: %r' % e)
211 211 opcode = op1 & 0b11
212 212 op1 = op1 >> 2
213 213 if opcode == 0:
214 214 if op1 == 0:
215 215 if op2 == 0:
216 216 return _eof(op1, op2)
217 217 return _jump(op1, op2)
218 218 return _jge(op1, op2)
219 219 elif opcode == 1:
220 220 return _jl(op1, op2)
221 221 elif opcode == 2:
222 222 return _line(op1, op2)
223 223 raise NotImplementedError('Unimplemented opcode %r' % opcode)
224 224
225 225 class linelog(object):
226 226 """Efficient cache for per-line history information."""
227 227
228 228 def __init__(self, program=None, maxrev=0):
229 229 if program is None:
230 230 # We pad the program with an extra leading EOF so that our
231 231 # offsets will match the C code exactly. This means we can
232 232 # interoperate with the C code.
233 233 program = [_eof(0, 0), _eof(0, 0)]
234 234 self._program = program
235 235 self._lastannotate = None
236 236 self._maxrev = maxrev
237 237
238 238 def __eq__(self, other):
239 239 return (type(self) == type(other)
240 240 and self._program == other._program
241 241 and self._maxrev == other._maxrev)
242 242
243 243 def __repr__(self):
244 244 return '<linelog at %s: maxrev=%d size=%d>' % (
245 245 hex(id(self)), self._maxrev, len(self._program))
246 246
247 247 def debugstr(self):
248 248 fmt = r'%%%dd %%s' % len(str(len(self._program)))
249 249 return pycompat.sysstr('\n').join(
250 250 fmt % (idx, i) for idx, i in enumerate(self._program[1:], 1))
251 251
252 252 @classmethod
253 253 def fromdata(cls, buf):
254 254 if len(buf) % _llentry.size != 0:
255 255 raise LineLogError(
256 256 "invalid linelog buffer size %d (must be a multiple of %d)" % (
257 257 len(buf), _llentry.size))
258 258 expected = len(buf) / _llentry.size
259 259 fakejge = _decodeone(buf, 0)
260 260 if isinstance(fakejge, _jump):
261 261 maxrev = 0
262 262 else:
263 263 maxrev = fakejge._cmprev
264 264 numentries = fakejge._target
265 265 if expected != numentries:
266 266 raise LineLogError("corrupt linelog data: claimed"
267 267 " %d entries but given data for %d entries" % (
268 268 expected, numentries))
269 269 instructions = [_eof(0, 0)]
270 270 for offset in pycompat.xrange(1, numentries):
271 271 instructions.append(_decodeone(buf, offset * _llentry.size))
272 272 return cls(instructions, maxrev=maxrev)
273 273
274 274 def encode(self):
275 275 hdr = _jge(self._maxrev, len(self._program)).encode()
276 276 return hdr + ''.join(i.encode() for i in self._program[1:])
277 277
278 278 def clear(self):
279 279 self._program = []
280 280 self._maxrev = 0
281 281 self._lastannotate = None
282 282
283 283 def replacelines_vec(self, rev, a1, a2, blines):
284 284 return self.replacelines(rev, a1, a2, 0, len(blines),
285 285 _internal_blines=blines)
286 286
287 287 def replacelines(self, rev, a1, a2, b1, b2, _internal_blines=None):
288 288 """Replace lines [a1, a2) with lines [b1, b2)."""
289 289 if self._lastannotate:
290 290 # TODO(augie): make replacelines() accept a revision at
291 291 # which we're editing as well as a revision to mark
292 292 # responsible for the edits. In hg-experimental it's
293 293 # stateful like this, so we're doing the same thing to
294 294 # retain compatibility with absorb until that's imported.
295 295 ar = self._lastannotate
296 296 else:
297 297 ar = self.annotate(rev)
298 298 # ar = self.annotate(self._maxrev)
299 299 if a1 > len(ar.lines):
300 300 raise LineLogError(
301 301 '%d contains %d lines, tried to access line %d' % (
302 302 rev, len(ar.lines), a1))
303 303 elif a1 == len(ar.lines):
304 304 # Simulated EOF instruction since we're at EOF, which
305 305 # doesn't have a "real" line.
306 306 a1inst = _eof(0, 0)
307 307 a1info = lineinfo(0, 0, ar._eof)
308 308 else:
309 309 a1info = ar.lines[a1]
310 310 a1inst = self._program[a1info._offset]
311 oldproglen = len(self._program)
311 programlen = self._program.__len__
312 oldproglen = programlen()
312 313 appendinst = self._program.append
313 314
314 315 # insert
315 316 if b1 < b2:
316 317 # Determine the jump target for the JGE at the start of
317 318 # the new block.
318 319 tgt = oldproglen + (b2 - b1 + 1)
319 320 # Jump to skip the insert if we're at an older revision.
320 321 appendinst(_jl(rev, tgt))
321 322 for linenum in pycompat.xrange(b1, b2):
322 323 if _internal_blines is None:
323 324 appendinst(_line(rev, linenum))
324 325 else:
325 326 appendinst(_line(*_internal_blines[linenum]))
326 327 # delete
327 328 if a1 < a2:
328 329 if a2 > len(ar.lines):
329 330 raise LineLogError(
330 331 '%d contains %d lines, tried to access line %d' % (
331 332 rev, len(ar.lines), a2))
332 333 elif a2 == len(ar.lines):
333 334 endaddr = ar._eof
334 335 else:
335 336 endaddr = ar.lines[a2]._offset
336 337 if a2 > 0 and rev < self._maxrev:
337 338 # If we're here, we're deleting a chunk of an old
338 339 # commit, so we need to be careful and not touch
339 340 # invisible lines between a2-1 and a2 (IOW, lines that
340 341 # are added later).
341 342 endaddr = ar.lines[a2 - 1]._offset + 1
342 343 appendinst(_jge(rev, endaddr))
343 344 # copy instruction from a1
344 345 appendinst(a1inst)
345 346 # if a1inst isn't a jump or EOF, then we need to add an unconditional
346 347 # jump back into the program here.
347 348 if not isinstance(a1inst, (_jump, _eof)):
348 349 appendinst(_jump(0, a1info._offset + 1))
349 350 # Patch instruction at a1, which makes our patch live.
350 351 self._program[a1info._offset] = _jump(0, oldproglen)
351 352 # For compat with the C version, re-annotate rev so that
352 353 # self.annotateresult is cromulent.. We could fix up the
353 354 # annotateresult in place (which is how the C version works),
354 355 # but for now we'll pass on that and see if it matters in
355 356 # practice.
356 357 self.annotate(max(self._lastannotate.rev, rev))
357 358 if rev > self._maxrev:
358 359 self._maxrev = rev
359 360
360 361 def annotate(self, rev):
361 362 pc = 1
362 363 lines = []
363 364 executed = 0
364 365 # Sanity check: if instructions executed exceeds len(program), we
365 366 # hit an infinite loop in the linelog program somehow and we
366 367 # should stop.
367 368 while pc is not None and executed < len(self._program):
368 369 inst = self._program[pc]
369 370 lastpc = pc
370 371 pc = inst.execute(rev, pc, lines.append)
371 372 executed += 1
372 373 if pc is not None:
373 374 raise LineLogError(
374 375 r'Probably hit an infinite loop in linelog. Program:\n' +
375 376 self.debugstr())
376 377 ar = annotateresult(rev, lines, lastpc)
377 378 self._lastannotate = ar
378 379 return ar
379 380
380 381 @property
381 382 def maxrev(self):
382 383 return self._maxrev
383 384
384 385 # Stateful methods which depend on the value of the last
385 386 # annotation run. This API is for compatiblity with the original
386 387 # linelog, and we should probably consider refactoring it.
387 388 @property
388 389 def annotateresult(self):
389 390 """Return the last annotation result. C linelog code exposed this."""
390 391 return [(l.rev, l.linenum) for l in self._lastannotate.lines]
391 392
392 393 def getoffset(self, line):
393 394 return self._lastannotate.lines[line]._offset
394 395
395 396 def getalllines(self, start=0, end=0):
396 397 """Get all lines that ever occurred in [start, end).
397 398
398 399 Passing start == end == 0 means "all lines ever".
399 400
400 401 This works in terms of *internal* program offsets, not line numbers.
401 402 """
402 403 pc = start or 1
403 404 lines = []
404 405 # only take as many steps as there are instructions in the
405 406 # program - if we don't find an EOF or our stop-line before
406 407 # then, something is badly broken.
407 408 for step in pycompat.xrange(len(self._program)):
408 409 inst = self._program[pc]
409 410 nextpc = pc + 1
410 411 if isinstance(inst, _jump):
411 412 nextpc = inst._target
412 413 elif isinstance(inst, _eof):
413 414 return lines
414 415 elif isinstance(inst, (_jl, _jge)):
415 416 pass
416 417 elif isinstance(inst, _line):
417 418 lines.append((inst._rev, inst._origlineno))
418 419 else:
419 420 raise LineLogError("Illegal instruction %r" % inst)
420 421 if nextpc == end:
421 422 return lines
422 423 pc = nextpc
423 424 raise LineLogError("Failed to perform getalllines")
General Comments 0
You need to be logged in to leave comments. Login now