##// END OF EJS Templates
New lazy index code for revlogs....
mason@suse.com -
r2079:ee96ca27 default
parent child Browse files
Show More
@@ -1,1101 +1,1206 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25
26 26 def flagstr(flag):
27 27 if flag == "inline":
28 28 return REVLOGNGINLINEDATA
29 29 raise RevlogError(_("unknown revlog flag %s" % flag))
30 30
31 31 def hash(text, p1, p2):
32 32 """generate a hash from the given text and its parent hashes
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 def compress(text):
46 46 """ generate a possibly-compressed representation of text """
47 47 if not text: return ("", text)
48 48 if len(text) < 44:
49 49 if text[0] == '\0': return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 return ("", bin)
56 56
57 57 def decompress(bin):
58 58 """ decompress the given input """
59 59 if not bin: return bin
60 60 t = bin[0]
61 61 if t == '\0': return bin
62 62 if t == 'x': return zlib.decompress(bin)
63 63 if t == 'u': return bin[1:]
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 v0shaoffset = 56
67 68 # index ng:
68 69 # 6 bytes offset
69 70 # 2 bytes flags
70 71 # 4 bytes compressed length
71 72 # 4 bytes uncompressed length
72 73 # 4 bytes: base rev
73 74 # 4 bytes link rev
74 75 # 4 bytes parent 1 rev
75 76 # 4 bytes parent 2 rev
76 77 # 32 bytes: nodeid
77 78 indexformatng = ">Qiiiiii20s12x"
79 ngshaoffset = 32
78 80 versionformat = ">i"
79 81
80 82 class lazyparser(object):
81 83 """
82 84 this class avoids the need to parse the entirety of large indices
83
84 By default we parse and load 1000 entries at a time.
85
86 If no position is specified, we load the whole index, and replace
87 the lazy objects in revlog with the underlying objects for
88 efficiency in cases where we look at most of the nodes.
89 85 """
90 def __init__(self, data, revlog, indexformat):
91 self.data = data
86 def __init__(self, dataf, size, indexformat, shaoffset):
87 self.dataf = dataf
88 self.format = indexformat
92 89 self.s = struct.calcsize(indexformat)
93 90 self.indexformat = indexformat
94 self.l = len(data)/self.s
91 self.datasize = size
92 self.l = size/self.s
95 93 self.index = [None] * self.l
96 94 self.map = {nullid: -1}
95 self.allmap = 0
97 96 self.all = 0
98 self.revlog = revlog
97 self.mapfind_count = 0
98 self.shaoffset = shaoffset
99 99
100 def load(self, pos=None):
100 def loadmap(self):
101 """
102 during a commit, we need to make sure the rev being added is
103 not a duplicate. This requires loading the entire index,
104 which is fairly slow. loadmap can load up just the node map,
105 which takes much less time.
106 """
107 if self.allmap: return
108 start = 0
109 end = self.datasize
110 self.allmap = 1
111 cur = 0
112 count = 0
113 blocksize = self.s * 256
114 self.dataf.seek(0)
115 while cur < end:
116 data = self.dataf.read(blocksize)
117 off = 0
118 for x in xrange(256):
119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 self.map[n] = count
121 count += 1
122 if count >= self.l:
123 break
124 off += self.s
125 cur += blocksize
126
127 def loadblock(self, blockstart, blocksize, data=None):
101 128 if self.all: return
102 if pos is not None:
103 block = pos / 1000
104 i = block * 1000
105 end = min(self.l, i + 1000)
106 else:
129 if data is None:
130 self.dataf.seek(blockstart)
131 data = self.dataf.read(blocksize)
132 lend = len(data) / self.s
133 i = blockstart / self.s
134 off = 0
135 for x in xrange(lend):
136 if self.index[i + x] == None:
137 b = data[off : off + self.s]
138 e = struct.unpack(self.format, b)
139 self.index[i + x] = e
140 self.map[e[-1]] = i + x
141 off += self.s
142
143 def findnode(self, node):
144 """search backwards through the index file for a specific node"""
145 if self.allmap: return None
146
147 # hg log will cause many many searches for the manifest
148 # nodes. After we get called a few times, just load the whole
149 # thing.
150 if self.mapfind_count > 8:
151 self.loadmap()
152 if node in self.map:
153 return node
154 return None
155 self.mapfind_count += 1
156 last = self.l - 1
157 while self.index[last] != None:
158 if last == 0:
107 159 self.all = 1
108 i = 0
109 end = self.l
110 self.revlog.index = self.index
111 self.revlog.nodemap = self.map
160 self.allmap = 1
161 return None
162 last -= 1
163 end = (last + 1) * self.s
164 blocksize = self.s * 256
165 while end >= 0:
166 start = max(end - blocksize, 0)
167 self.dataf.seek(start)
168 data = self.dataf.read(end - start)
169 findend = end - start
170 while True:
171 # we're searching backwards, so weh have to make sure
172 # we don't find a changeset where this node is a parent
173 off = data.rfind(node, 0, findend)
174 findend = off
175 if off >= 0:
176 i = off / self.s
177 off = i * self.s
178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 if n == node:
180 self.map[n] = i + start / self.s
181 return node
182 else:
183 break
184 end -= blocksize
185 return None
112 186
113 while i < end:
114 if not self.index[i]:
115 d = self.data[i * self.s: (i + 1) * self.s]
116 e = struct.unpack(self.indexformat, d)
117 self.index[i] = e
118 self.map[e[-1]] = i
119 i += 1
187 def loadindex(self, i=None, end=None):
188 if self.all: return
189 all = False
190 if i == None:
191 blockstart = 0
192 blocksize = (512 / self.s) * self.s
193 end = self.datasize
194 all = True
195 else:
196 if end:
197 blockstart = i * self.s
198 end = end * self.s
199 blocksize = end - blockstart
200 else:
201 blockstart = (i & ~(32)) * self.s
202 blocksize = self.s * 64
203 end = blockstart + blocksize
204 while blockstart < end:
205 self.loadblock(blockstart, blocksize)
206 blockstart += blocksize
207 if all: self.all = True
120 208
121 209 class lazyindex(object):
122 210 """a lazy version of the index array"""
123 211 def __init__(self, parser):
124 212 self.p = parser
125 213 def __len__(self):
126 214 return len(self.p.index)
127 215 def load(self, pos):
128 216 if pos < 0:
129 217 pos += len(self.p.index)
130 self.p.load(pos)
218 self.p.loadindex(pos)
131 219 return self.p.index[pos]
132 220 def __getitem__(self, pos):
133 221 return self.p.index[pos] or self.load(pos)
134 222 def __setitem__(self, pos, item):
135 223 self.p.index[pos] = item
136 224 def __delitem__(self, pos):
137 225 del self.p.index[pos]
138 226 def append(self, e):
139 227 self.p.index.append(e)
140 228
141 229 class lazymap(object):
142 230 """a lazy version of the node map"""
143 231 def __init__(self, parser):
144 232 self.p = parser
145 233 def load(self, key):
146 if self.p.all: return
147 n = self.p.data.find(key)
148 if n < 0:
234 n = self.p.findnode(key)
235 if n == None:
149 236 raise KeyError(key)
150 pos = n / self.p.s
151 self.p.load(pos)
152 237 def __contains__(self, key):
153 self.p.load()
238 if key in self.p.map:
239 return True
240 self.p.loadmap()
154 241 return key in self.p.map
155 242 def __iter__(self):
156 243 yield nullid
157 244 for i in xrange(self.p.l):
158 245 try:
159 246 yield self.p.index[i][-1]
160 247 except:
161 self.p.load(i)
248 self.p.loadindex(i)
162 249 yield self.p.index[i][-1]
163 250 def __getitem__(self, key):
164 251 try:
165 252 return self.p.map[key]
166 253 except KeyError:
167 254 try:
168 255 self.load(key)
169 256 return self.p.map[key]
170 257 except KeyError:
171 258 raise KeyError("node " + hex(key))
172 259 def __setitem__(self, key, val):
173 260 self.p.map[key] = val
174 261 def __delitem__(self, key):
175 262 del self.p.map[key]
176 263
177 264 class RevlogError(Exception): pass
178 265
179 266 class revlog(object):
180 267 """
181 268 the underlying revision storage object
182 269
183 270 A revlog consists of two parts, an index and the revision data.
184 271
185 272 The index is a file with a fixed record size containing
186 273 information on each revision, includings its nodeid (hash), the
187 274 nodeids of its parents, the position and offset of its data within
188 275 the data file, and the revision it's based on. Finally, each entry
189 276 contains a linkrev entry that can serve as a pointer to external
190 277 data.
191 278
192 279 The revision data itself is a linear collection of data chunks.
193 280 Each chunk represents a revision and is usually represented as a
194 281 delta against the previous chunk. To bound lookup time, runs of
195 282 deltas are limited to about 2 times the length of the original
196 283 version data. This makes retrieval of a version proportional to
197 284 its size, or O(1) relative to the number of revisions.
198 285
199 286 Both pieces of the revlog are written to in an append-only
200 287 fashion, which means we never need to rewrite a file to insert or
201 288 remove data, and can use some simple techniques to avoid the need
202 289 for locking while reading.
203 290 """
204 291 def __init__(self, opener, indexfile, datafile, defversion=0):
205 292 """
206 293 create a revlog object
207 294
208 295 opener is a function that abstracts the file opening operation
209 296 and can be used to implement COW semantics or the like.
210 297 """
211 298 self.indexfile = indexfile
212 299 self.datafile = datafile
213 300 self.opener = opener
214 301
215 302 self.indexstat = None
216 303 self.cache = None
217 304 self.chunkcache = None
218 305 self.defversion = defversion
219 306 self.load()
220 307
221 308 def load(self):
222 309 v = self.defversion
223 310 try:
224 311 f = self.opener(self.indexfile)
225 i = f.read()
312 i = f.read(4)
313 f.seek(0)
226 314 except IOError, inst:
227 315 if inst.errno != errno.ENOENT:
228 316 raise
229 317 i = ""
230 318 else:
231 319 try:
232 320 st = os.fstat(f.fileno())
233 321 except AttributeError, inst:
234 322 st = None
235 323 else:
236 324 oldst = self.indexstat
237 325 if (oldst and st.st_dev == oldst.st_dev
238 326 and st.st_ino == oldst.st_ino
239 327 and st.st_mtime == oldst.st_mtime
240 328 and st.st_ctime == oldst.st_ctime):
241 329 return
242 330 self.indexstat = st
243 331 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
332 v = struct.unpack(versionformat, i)[0]
245 333 flags = v & ~0xFFFF
246 334 fmt = v & 0xFFFF
247 335 if fmt == 0:
248 336 if flags:
249 337 raise RevlogError(_("index %s invalid flags %x for format v0" %
250 338 (self.indexfile, flags)))
251 339 elif fmt == REVLOGNG:
252 340 if flags & ~REVLOGNGINLINEDATA:
253 341 raise RevlogError(_("index %s invalid flags %x for revlogng" %
254 342 (self.indexfile, flags)))
255 343 else:
256 344 raise RevlogError(_("index %s invalid format %d" %
257 345 (self.indexfile, fmt)))
258 346 self.version = v
259 347 if v == 0:
260 348 self.indexformat = indexformatv0
349 shaoffset = v0shaoffset
261 350 else:
262 351 self.indexformat = indexformatng
352 shaoffset = ngshaoffset
263 353
264 354 if i:
265 355 if not self.inlinedata() and st and st.st_size > 10000:
266 356 # big index, let's parse it on demand
267 parser = lazyparser(i, self, self.indexformat)
357 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
268 358 self.index = lazyindex(parser)
269 359 self.nodemap = lazymap(parser)
270 360 else:
361 i = f.read()
271 362 self.parseindex(i)
272 363 if self.inlinedata():
273 364 # we've already got the entire data file read in, save it
274 365 # in the chunk data
275 366 self.chunkcache = (0, i)
276 367 if self.version != 0:
277 368 e = list(self.index[0])
278 369 type = self.ngtype(e[0])
279 370 e[0] = self.offset_type(0, type)
280 371 self.index[0] = e
281 372 else:
282 373 self.nodemap = { nullid: -1}
283 374 self.index = []
284 375
285 376
286 377 def parseindex(self, data):
287 378 s = struct.calcsize(self.indexformat)
288 379 l = len(data)
289 380 self.index = []
290 381 self.nodemap = {nullid: -1}
291 382 inline = self.inlinedata()
292 383 off = 0
293 384 n = 0
294 385 while off < l:
295 386 e = struct.unpack(self.indexformat, data[off:off + s])
296 387 self.index.append(e)
297 388 self.nodemap[e[-1]] = n
298 389 n += 1
299 390 off += s
300 391 if inline:
301 392 off += e[1]
302 393
303 394 def ngoffset(self, q):
304 395 if q & 0xFFFF:
305 396 raise RevlogError(_('%s: incompatible revision flag %x') %
306 397 (self.indexfile, type))
307 398 return long(q >> 16)
308 399
309 400 def ngtype(self, q):
310 401 return int(q & 0xFFFF)
311 402
312 403 def offset_type(self, offset, type):
313 404 return long(long(offset) << 16 | type)
314 405
406 def loadindex(self, start, end):
407 """load a block of indexes all at once from the lazy parser"""
408 if isinstance(self.index, lazyindex):
409 self.index.p.loadindex(start, end)
410
315 411 def loadindexmap(self):
316 412 """loads both the map and the index from the lazy parser"""
317 413 if isinstance(self.index, lazyindex):
318 414 p = self.index.p
319 p.load()
415 p.loadindex()
416 self.nodemap = p.map
417
418 def loadmap(self):
419 """loads the map from the lazy parser"""
420 if isinstance(self.nodemap, lazymap):
421 self.nodemap.p.loadmap()
422 self.nodemap = self.nodemap.p.map
320 423
321 424 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
322 425 def tip(self): return self.node(len(self.index) - 1)
323 426 def count(self): return len(self.index)
324 427 def node(self, rev):
325 428 return (rev < 0) and nullid or self.index[rev][-1]
326 429 def rev(self, node):
327 430 try:
328 431 return self.nodemap[node]
329 432 except KeyError:
330 433 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
331 434 def linkrev(self, node): return self.index[self.rev(node)][-4]
332 435 def parents(self, node):
333 436 if node == nullid: return (nullid, nullid)
334 437 r = self.rev(node)
335 438 d = self.index[r][-3:-1]
336 439 if self.version == 0:
337 440 return d
338 441 return [ self.node(x) for x in d ]
339 442 def start(self, rev):
340 443 if rev < 0:
341 444 return -1
342 445 if self.version != 0:
343 446 return self.ngoffset(self.index[rev][0])
344 447 return self.index[rev][0]
345 448
346 449 def end(self, rev): return self.start(rev) + self.length(rev)
347 450
348 451 def size(self, rev):
349 452 """return the length of the uncompressed text for a given revision"""
350 453 l = -1
351 454 if self.version != 0:
352 455 l = self.index[rev][2]
353 456 if l >= 0:
354 457 return l
355 458
356 459 t = self.revision(self.node(rev))
357 460 return len(t)
358 461
359 462 # alternate implementation, The advantage to this code is it
360 463 # will be faster for a single revision. But, the results are not
361 464 # cached, so finding the size of every revision will be slower.
362 465 """
363 466 if self.cache and self.cache[1] == rev:
364 467 return len(self.cache[2])
365 468
366 469 base = self.base(rev)
367 470 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
368 471 base = self.cache[1]
369 472 text = self.cache[2]
370 473 else:
371 474 text = self.revision(self.node(base))
372 475
373 476 l = len(text)
374 477 for x in xrange(base + 1, rev + 1):
375 478 l = mdiff.patchedsize(l, self.chunk(x))
376 479 return l
377 480 """
378 481
379 482 def length(self, rev):
380 483 if rev < 0:
381 484 return 0
382 485 else:
383 486 return self.index[rev][1]
384 487 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
385 488
386 489 def reachable(self, rev, stop=None):
387 490 reachable = {}
388 491 visit = [rev]
389 492 reachable[rev] = 1
390 493 if stop:
391 494 stopn = self.rev(stop)
392 495 else:
393 496 stopn = 0
394 497 while visit:
395 498 n = visit.pop(0)
396 499 if n == stop:
397 500 continue
398 501 if n == nullid:
399 502 continue
400 503 for p in self.parents(n):
401 504 if self.rev(p) < stopn:
402 505 continue
403 506 if p not in reachable:
404 507 reachable[p] = 1
405 508 visit.append(p)
406 509 return reachable
407 510
408 511 def nodesbetween(self, roots=None, heads=None):
409 512 """Return a tuple containing three elements. Elements 1 and 2 contain
410 513 a final list bases and heads after all the unreachable ones have been
411 514 pruned. Element 0 contains a topologically sorted list of all
412 515
413 516 nodes that satisfy these constraints:
414 517 1. All nodes must be descended from a node in roots (the nodes on
415 518 roots are considered descended from themselves).
416 519 2. All nodes must also be ancestors of a node in heads (the nodes in
417 520 heads are considered to be their own ancestors).
418 521
419 522 If roots is unspecified, nullid is assumed as the only root.
420 523 If heads is unspecified, it is taken to be the output of the
421 524 heads method (i.e. a list of all nodes in the repository that
422 525 have no children)."""
423 526 nonodes = ([], [], [])
424 527 if roots is not None:
425 528 roots = list(roots)
426 529 if not roots:
427 530 return nonodes
428 531 lowestrev = min([self.rev(n) for n in roots])
429 532 else:
430 533 roots = [nullid] # Everybody's a descendent of nullid
431 534 lowestrev = -1
432 535 if (lowestrev == -1) and (heads is None):
433 536 # We want _all_ the nodes!
434 537 return ([self.node(r) for r in xrange(0, self.count())],
435 538 [nullid], list(self.heads()))
436 539 if heads is None:
437 540 # All nodes are ancestors, so the latest ancestor is the last
438 541 # node.
439 542 highestrev = self.count() - 1
440 543 # Set ancestors to None to signal that every node is an ancestor.
441 544 ancestors = None
442 545 # Set heads to an empty dictionary for later discovery of heads
443 546 heads = {}
444 547 else:
445 548 heads = list(heads)
446 549 if not heads:
447 550 return nonodes
448 551 ancestors = {}
449 552 # Start at the top and keep marking parents until we're done.
450 553 nodestotag = heads[:]
451 554 # Turn heads into a dictionary so we can remove 'fake' heads.
452 555 # Also, later we will be using it to filter out the heads we can't
453 556 # find from roots.
454 557 heads = dict.fromkeys(heads, 0)
455 558 # Remember where the top was so we can use it as a limit later.
456 559 highestrev = max([self.rev(n) for n in nodestotag])
457 560 while nodestotag:
458 561 # grab a node to tag
459 562 n = nodestotag.pop()
460 563 # Never tag nullid
461 564 if n == nullid:
462 565 continue
463 566 # A node's revision number represents its place in a
464 567 # topologically sorted list of nodes.
465 568 r = self.rev(n)
466 569 if r >= lowestrev:
467 570 if n not in ancestors:
468 571 # If we are possibly a descendent of one of the roots
469 572 # and we haven't already been marked as an ancestor
470 573 ancestors[n] = 1 # Mark as ancestor
471 574 # Add non-nullid parents to list of nodes to tag.
472 575 nodestotag.extend([p for p in self.parents(n) if
473 576 p != nullid])
474 577 elif n in heads: # We've seen it before, is it a fake head?
475 578 # So it is, real heads should not be the ancestors of
476 579 # any other heads.
477 580 heads.pop(n)
478 581 if not ancestors:
479 582 return nonodes
480 583 # Now that we have our set of ancestors, we want to remove any
481 584 # roots that are not ancestors.
482 585
483 586 # If one of the roots was nullid, everything is included anyway.
484 587 if lowestrev > -1:
485 588 # But, since we weren't, let's recompute the lowest rev to not
486 589 # include roots that aren't ancestors.
487 590
488 591 # Filter out roots that aren't ancestors of heads
489 592 roots = [n for n in roots if n in ancestors]
490 593 # Recompute the lowest revision
491 594 if roots:
492 595 lowestrev = min([self.rev(n) for n in roots])
493 596 else:
494 597 # No more roots? Return empty list
495 598 return nonodes
496 599 else:
497 600 # We are descending from nullid, and don't need to care about
498 601 # any other roots.
499 602 lowestrev = -1
500 603 roots = [nullid]
501 604 # Transform our roots list into a 'set' (i.e. a dictionary where the
502 605 # values don't matter.
503 606 descendents = dict.fromkeys(roots, 1)
504 607 # Also, keep the original roots so we can filter out roots that aren't
505 608 # 'real' roots (i.e. are descended from other roots).
506 609 roots = descendents.copy()
507 610 # Our topologically sorted list of output nodes.
508 611 orderedout = []
509 612 # Don't start at nullid since we don't want nullid in our output list,
510 613 # and if nullid shows up in descedents, empty parents will look like
511 614 # they're descendents.
512 615 for r in xrange(max(lowestrev, 0), highestrev + 1):
513 616 n = self.node(r)
514 617 isdescendent = False
515 618 if lowestrev == -1: # Everybody is a descendent of nullid
516 619 isdescendent = True
517 620 elif n in descendents:
518 621 # n is already a descendent
519 622 isdescendent = True
520 623 # This check only needs to be done here because all the roots
521 624 # will start being marked is descendents before the loop.
522 625 if n in roots:
523 626 # If n was a root, check if it's a 'real' root.
524 627 p = tuple(self.parents(n))
525 628 # If any of its parents are descendents, it's not a root.
526 629 if (p[0] in descendents) or (p[1] in descendents):
527 630 roots.pop(n)
528 631 else:
529 632 p = tuple(self.parents(n))
530 633 # A node is a descendent if either of its parents are
531 634 # descendents. (We seeded the dependents list with the roots
532 635 # up there, remember?)
533 636 if (p[0] in descendents) or (p[1] in descendents):
534 637 descendents[n] = 1
535 638 isdescendent = True
536 639 if isdescendent and ((ancestors is None) or (n in ancestors)):
537 640 # Only include nodes that are both descendents and ancestors.
538 641 orderedout.append(n)
539 642 if (ancestors is not None) and (n in heads):
540 643 # We're trying to figure out which heads are reachable
541 644 # from roots.
542 645 # Mark this head as having been reached
543 646 heads[n] = 1
544 647 elif ancestors is None:
545 648 # Otherwise, we're trying to discover the heads.
546 649 # Assume this is a head because if it isn't, the next step
547 650 # will eventually remove it.
548 651 heads[n] = 1
549 652 # But, obviously its parents aren't.
550 653 for p in self.parents(n):
551 654 heads.pop(p, None)
552 655 heads = [n for n in heads.iterkeys() if heads[n] != 0]
553 656 roots = roots.keys()
554 657 assert orderedout
555 658 assert roots
556 659 assert heads
557 660 return (orderedout, roots, heads)
558 661
559 662 def heads(self, start=None):
560 663 """return the list of all nodes that have no children
561 664
562 665 if start is specified, only heads that are descendants of
563 666 start will be returned
564 667
565 668 """
566 669 if start is None:
567 670 start = nullid
568 671 reachable = {start: 1}
569 672 heads = {start: 1}
570 673 startrev = self.rev(start)
571 674
572 675 for r in xrange(startrev + 1, self.count()):
573 676 n = self.node(r)
574 677 for pn in self.parents(n):
575 678 if pn in reachable:
576 679 reachable[n] = 1
577 680 heads[n] = 1
578 681 if pn in heads:
579 682 del heads[pn]
580 683 return heads.keys()
581 684
582 685 def children(self, node):
583 686 """find the children of a given node"""
584 687 c = []
585 688 p = self.rev(node)
586 689 for r in range(p + 1, self.count()):
587 690 n = self.node(r)
588 691 for pn in self.parents(n):
589 692 if pn == node:
590 693 c.append(n)
591 694 continue
592 695 elif pn == nullid:
593 696 continue
594 697 return c
595 698
596 699 def lookup(self, id):
597 700 """locate a node based on revision number or subset of hex nodeid"""
598 701 try:
599 702 rev = int(id)
600 703 if str(rev) != id: raise ValueError
601 704 if rev < 0: rev = self.count() + rev
602 705 if rev < 0 or rev >= self.count(): raise ValueError
603 706 return self.node(rev)
604 707 except (ValueError, OverflowError):
605 708 c = []
606 709 for n in self.nodemap:
607 710 if hex(n).startswith(id):
608 711 c.append(n)
609 712 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
610 713 if len(c) < 1: raise RevlogError(_("No match found"))
611 714 return c[0]
612 715
613 716 return None
614 717
615 718 def diff(self, a, b):
616 719 """return a delta between two revisions"""
617 720 return mdiff.textdiff(a, b)
618 721
619 722 def patches(self, t, pl):
620 723 """apply a list of patches to a string"""
621 724 return mdiff.patches(t, pl)
622 725
623 726 def chunk(self, rev, df=None, cachelen=4096):
624 727 start, length = self.start(rev), self.length(rev)
625 728 inline = self.inlinedata()
626 729 if inline:
627 730 start += (rev + 1) * struct.calcsize(self.indexformat)
628 731 end = start + length
629 732 def loadcache(df):
630 733 cache_length = max(cachelen, length) # 4k
631 734 if not df:
632 735 if inline:
633 736 df = self.opener(self.indexfile)
634 737 else:
635 738 df = self.opener(self.datafile)
636 739 df.seek(start)
637 740 self.chunkcache = (start, df.read(cache_length))
638 741
639 742 if not self.chunkcache:
640 743 loadcache(df)
641 744
642 745 cache_start = self.chunkcache[0]
643 746 cache_end = cache_start + len(self.chunkcache[1])
644 747 if start >= cache_start and end <= cache_end:
645 748 # it is cached
646 749 offset = start - cache_start
647 750 else:
648 751 loadcache(df)
649 752 offset = 0
650 753
651 754 #def checkchunk():
652 755 # df = self.opener(self.datafile)
653 756 # df.seek(start)
654 757 # return df.read(length)
655 758 #assert s == checkchunk()
656 759 return decompress(self.chunkcache[1][offset:offset + length])
657 760
658 761 def delta(self, node):
659 762 """return or calculate a delta between a node and its predecessor"""
660 763 r = self.rev(node)
661 764 return self.revdiff(r - 1, r)
662 765
663 766 def revdiff(self, rev1, rev2):
664 767 """return or calculate a delta between two revisions"""
665 768 b1 = self.base(rev1)
666 769 b2 = self.base(rev2)
667 770 if b1 == b2 and rev1 + 1 == rev2:
668 771 return self.chunk(rev2)
669 772 else:
670 773 return self.diff(self.revision(self.node(rev1)),
671 774 self.revision(self.node(rev2)))
672 775
673 776 def revision(self, node):
674 777 """return an uncompressed revision of a given"""
675 778 if node == nullid: return ""
676 779 if self.cache and self.cache[0] == node: return self.cache[2]
677 780
678 781 # look up what we need to read
679 782 text = None
680 783 rev = self.rev(node)
681 784 base = self.base(rev)
682 785
683 786 if self.inlinedata():
684 787 # we probably have the whole chunk cached
685 788 df = None
686 789 else:
687 790 df = self.opener(self.datafile)
688 791
689 792 # do we have useful data cached?
690 793 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
691 794 base = self.cache[1]
692 795 text = self.cache[2]
796 self.loadindex(base, rev + 1)
693 797 else:
798 self.loadindex(base, rev + 1)
694 799 text = self.chunk(base, df=df)
695 800
696 801 bins = []
697 802 for r in xrange(base + 1, rev + 1):
698 803 bins.append(self.chunk(r, df=df))
699 804
700 805 text = self.patches(text, bins)
701 806
702 807 p1, p2 = self.parents(node)
703 808 if node != hash(text, p1, p2):
704 809 raise RevlogError(_("integrity check failed on %s:%d")
705 810 % (self.datafile, rev))
706 811
707 812 self.cache = (node, rev, text)
708 813 return text
709 814
710 815 def checkinlinesize(self, tr, fp=None):
711 816 if not self.inlinedata():
712 817 return
713 818 if not fp:
714 819 fp = self.opener(self.indexfile, 'r')
715 820 size = fp.tell()
716 821 if size < 131072:
717 822 return
718 823 tr.add(self.datafile, 0)
719 824 df = self.opener(self.datafile, 'w')
720 825 calc = struct.calcsize(self.indexformat)
721 826 for r in xrange(self.count()):
722 827 start = self.start(r) + (r + 1) * calc
723 828 length = self.length(r)
724 829 fp.seek(start)
725 830 d = fp.read(length)
726 831 df.write(d)
727 832 fp.close()
728 833 df.close()
729 834 fp = self.opener(self.indexfile, 'w', atomictemp=True)
730 835 self.version &= ~(REVLOGNGINLINEDATA)
731 836 if self.count():
732 837 x = self.index[0]
733 838 e = struct.pack(self.indexformat, *x)[4:]
734 839 l = struct.pack(versionformat, self.version)
735 840 fp.write(l)
736 841 fp.write(e)
737 842
738 843 for i in xrange(1, self.count()):
739 844 x = self.index[i]
740 845 e = struct.pack(self.indexformat, *x)
741 846 fp.write(e)
742 847
743 848 # if we don't call rename, the temp file will never replace the
744 849 # real index
745 850 fp.rename()
746 851 self.chunkcache = None
747 852
748 853 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
749 854 """add a revision to the log
750 855
751 856 text - the revision data to add
752 857 transaction - the transaction object used for rollback
753 858 link - the linkrev data to add
754 859 p1, p2 - the parent nodeids of the revision
755 860 d - an optional precomputed delta
756 861 """
757 862 if text is None: text = ""
758 863 if p1 is None: p1 = self.tip()
759 864 if p2 is None: p2 = nullid
760 865
761 866 node = hash(text, p1, p2)
762 867
763 868 if node in self.nodemap:
764 869 return node
765 870
766 871 n = self.count()
767 872 t = n - 1
768 873
769 874 if n:
770 875 base = self.base(t)
771 876 start = self.start(base)
772 877 end = self.end(t)
773 878 if not d:
774 879 prev = self.revision(self.tip())
775 880 d = self.diff(prev, str(text))
776 881 data = compress(d)
777 882 l = len(data[1]) + len(data[0])
778 883 dist = end - start + l
779 884
780 885 # full versions are inserted when the needed deltas
781 886 # become comparable to the uncompressed text
782 887 if not n or dist > len(text) * 2:
783 888 data = compress(text)
784 889 l = len(data[1]) + len(data[0])
785 890 base = n
786 891 else:
787 892 base = self.base(t)
788 893
789 894 offset = 0
790 895 if t >= 0:
791 896 offset = self.end(t)
792 897
793 898 if self.version == 0:
794 899 e = (offset, l, base, link, p1, p2, node)
795 900 else:
796 901 e = (self.offset_type(offset, 0), l, len(text),
797 902 base, link, self.rev(p1), self.rev(p2), node)
798 903
799 904 self.index.append(e)
800 905 self.nodemap[node] = n
801 906 entry = struct.pack(self.indexformat, *e)
802 907
803 908 if not self.inlinedata():
804 909 transaction.add(self.datafile, offset)
805 910 transaction.add(self.indexfile, n * len(entry))
806 911 f = self.opener(self.datafile, "a")
807 912 if data[0]:
808 913 f.write(data[0])
809 914 f.write(data[1])
810 915 f = self.opener(self.indexfile, "a")
811 916 else:
812 917 f = self.opener(self.indexfile, "a+")
813 918 f.seek(0, 2)
814 919 transaction.add(self.indexfile, f.tell())
815 920
816 921 if len(self.index) == 1 and self.version != 0:
817 922 l = struct.pack(versionformat, self.version)
818 923 f.write(l)
819 924 entry = entry[4:]
820 925
821 926 f.write(entry)
822 927
823 928 if self.inlinedata():
824 929 f.write(data[0])
825 930 f.write(data[1])
826 931 self.checkinlinesize(transaction, f)
827 932
828 933 self.cache = (node, n, text)
829 934 return node
830 935
831 936 def ancestor(self, a, b):
832 937 """calculate the least common ancestor of nodes a and b"""
833 938 # calculate the distance of every node from root
834 939 dist = {nullid: 0}
835 940 for i in xrange(self.count()):
836 941 n = self.node(i)
837 942 p1, p2 = self.parents(n)
838 943 dist[n] = max(dist[p1], dist[p2]) + 1
839 944
840 945 # traverse ancestors in order of decreasing distance from root
841 946 def ancestors(node):
842 947 # we store negative distances because heap returns smallest member
843 948 h = [(-dist[node], node)]
844 949 seen = {}
845 950 while h:
846 951 d, n = heapq.heappop(h)
847 952 if n not in seen:
848 953 seen[n] = 1
849 954 yield (-d, n)
850 955 for p in self.parents(n):
851 956 heapq.heappush(h, (-dist[p], p))
852 957
853 958 def generations(node):
854 959 sg, s = None, {}
855 960 for g,n in ancestors(node):
856 961 if g != sg:
857 962 if sg:
858 963 yield sg, s
859 964 sg, s = g, {n:1}
860 965 else:
861 966 s[n] = 1
862 967 yield sg, s
863 968
864 969 x = generations(a)
865 970 y = generations(b)
866 971 gx = x.next()
867 972 gy = y.next()
868 973
869 974 # increment each ancestor list until it is closer to root than
870 975 # the other, or they match
871 976 while 1:
872 977 #print "ancestor gen %s %s" % (gx[0], gy[0])
873 978 if gx[0] == gy[0]:
874 979 # find the intersection
875 980 i = [ n for n in gx[1] if n in gy[1] ]
876 981 if i:
877 982 return i[0]
878 983 else:
879 984 #print "next"
880 985 gy = y.next()
881 986 gx = x.next()
882 987 elif gx[0] < gy[0]:
883 988 #print "next y"
884 989 gy = y.next()
885 990 else:
886 991 #print "next x"
887 992 gx = x.next()
888 993
889 994 def group(self, nodelist, lookup, infocollect=None):
890 995 """calculate a delta group
891 996
892 997 Given a list of changeset revs, return a set of deltas and
893 998 metadata corresponding to nodes. the first delta is
894 999 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
895 1000 have this parent as it has all history before these
896 1001 changesets. parent is parent[0]
897 1002 """
898 1003 revs = [self.rev(n) for n in nodelist]
899 1004
900 1005 # if we don't have any revisions touched by these changesets, bail
901 1006 if not revs:
902 1007 yield changegroup.closechunk()
903 1008 return
904 1009
905 1010 # add the parent of the first rev
906 1011 p = self.parents(self.node(revs[0]))[0]
907 1012 revs.insert(0, self.rev(p))
908 1013
909 1014 # build deltas
910 1015 for d in xrange(0, len(revs) - 1):
911 1016 a, b = revs[d], revs[d + 1]
912 1017 nb = self.node(b)
913 1018
914 1019 if infocollect is not None:
915 1020 infocollect(nb)
916 1021
917 1022 d = self.revdiff(a, b)
918 1023 p = self.parents(nb)
919 1024 meta = nb + p[0] + p[1] + lookup(nb)
920 1025 yield changegroup.genchunk("%s%s" % (meta, d))
921 1026
922 1027 yield changegroup.closechunk()
923 1028
924 1029 def addgroup(self, revs, linkmapper, transaction, unique=0):
925 1030 """
926 1031 add a delta group
927 1032
928 1033 given a set of deltas, add them to the revision log. the
929 1034 first delta is against its parent, which should be in our
930 1035 log, the rest are against the previous delta.
931 1036 """
932 1037
933 1038 #track the base of the current delta log
934 1039 r = self.count()
935 1040 t = r - 1
936 1041 node = None
937 1042
938 1043 base = prev = -1
939 1044 start = end = textlen = 0
940 1045 if r:
941 1046 end = self.end(t)
942 1047
943 1048 ifh = self.opener(self.indexfile, "a+")
944 1049 ifh.seek(0, 2)
945 1050 transaction.add(self.indexfile, ifh.tell())
946 1051 if self.inlinedata():
947 1052 dfh = None
948 1053 else:
949 1054 transaction.add(self.datafile, end)
950 1055 dfh = self.opener(self.datafile, "a")
951 1056
952 1057 # loop through our set of deltas
953 1058 chain = None
954 1059 for chunk in revs:
955 1060 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
956 1061 link = linkmapper(cs)
957 1062 if node in self.nodemap:
958 1063 # this can happen if two branches make the same change
959 1064 # if unique:
960 1065 # raise RevlogError(_("already have %s") % hex(node[:4]))
961 1066 chain = node
962 1067 continue
963 1068 delta = chunk[80:]
964 1069
965 1070 for p in (p1, p2):
966 1071 if not p in self.nodemap:
967 1072 raise RevlogError(_("unknown parent %s") % short(p1))
968 1073
969 1074 if not chain:
970 1075 # retrieve the parent revision of the delta chain
971 1076 chain = p1
972 1077 if not chain in self.nodemap:
973 1078 raise RevlogError(_("unknown base %s") % short(chain[:4]))
974 1079
975 1080 # full versions are inserted when the needed deltas become
976 1081 # comparable to the uncompressed text or when the previous
977 1082 # version is not the one we have a delta against. We use
978 1083 # the size of the previous full rev as a proxy for the
979 1084 # current size.
980 1085
981 1086 if chain == prev:
982 1087 tempd = compress(delta)
983 1088 cdelta = tempd[0] + tempd[1]
984 1089 textlen = mdiff.patchedsize(textlen, delta)
985 1090
986 1091 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
987 1092 # flush our writes here so we can read it in revision
988 1093 if dfh:
989 1094 dfh.flush()
990 1095 ifh.flush()
991 1096 text = self.revision(chain)
992 1097 text = self.patches(text, [delta])
993 1098 chk = self.addrevision(text, transaction, link, p1, p2)
994 1099 if chk != node:
995 1100 raise RevlogError(_("consistency error adding group"))
996 1101 textlen = len(text)
997 1102 else:
998 1103 if self.version == 0:
999 1104 e = (end, len(cdelta), base, link, p1, p2, node)
1000 1105 else:
1001 1106 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1002 1107 link, self.rev(p1), self.rev(p2), node)
1003 1108 self.index.append(e)
1004 1109 self.nodemap[node] = r
1005 1110 if self.inlinedata():
1006 1111 ifh.write(struct.pack(self.indexformat, *e))
1007 1112 ifh.write(cdelta)
1008 1113 self.checkinlinesize(transaction, ifh)
1009 1114 if not self.inlinedata():
1010 1115 dfh = self.opener(self.datafile, "a")
1011 1116 ifh = self.opener(self.indexfile, "a")
1012 1117 else:
1013 1118 if not dfh:
1014 1119 # addrevision switched from inline to conventional
1015 1120 # reopen the index
1016 1121 dfh = self.opener(self.datafile, "a")
1017 1122 ifh = self.opener(self.indexfile, "a")
1018 1123 dfh.write(cdelta)
1019 1124 ifh.write(struct.pack(self.indexformat, *e))
1020 1125
1021 1126 t, r, chain, prev = r, r + 1, node, node
1022 1127 base = self.base(t)
1023 1128 start = self.start(base)
1024 1129 end = self.end(t)
1025 1130
1026 1131 if node is None:
1027 1132 raise RevlogError(_("group to be added is empty"))
1028 1133 return node
1029 1134
1030 1135 def strip(self, rev, minlink):
1031 1136 if self.count() == 0 or rev >= self.count():
1032 1137 return
1033 1138
1034 1139 if isinstance(self.index, lazyindex):
1035 1140 self.loadindexmap()
1036 1141
1037 1142 # When stripping away a revision, we need to make sure it
1038 1143 # does not actually belong to an older changeset.
1039 1144 # The minlink parameter defines the oldest revision
1040 1145 # we're allowed to strip away.
1041 1146 while minlink > self.index[rev][-4]:
1042 1147 rev += 1
1043 1148 if rev >= self.count():
1044 1149 return
1045 1150
1046 1151 # first truncate the files on disk
1047 1152 end = self.start(rev)
1048 1153 if not self.inlinedata():
1049 1154 df = self.opener(self.datafile, "a")
1050 1155 df.truncate(end)
1051 1156 end = rev * struct.calcsize(self.indexformat)
1052 1157 else:
1053 1158 end += rev * struct.calcsize(self.indexformat)
1054 1159
1055 1160 indexf = self.opener(self.indexfile, "a")
1056 1161 indexf.truncate(end)
1057 1162
1058 1163 # then reset internal state in memory to forget those revisions
1059 1164 self.cache = None
1060 1165 self.chunkcache = None
1061 1166 for x in xrange(rev, self.count()):
1062 1167 del self.nodemap[self.node(x)]
1063 1168
1064 1169 del self.index[rev:]
1065 1170
1066 1171 def checksize(self):
1067 1172 expected = 0
1068 1173 if self.count():
1069 1174 expected = self.end(self.count() - 1)
1070 1175
1071 1176 try:
1072 1177 f = self.opener(self.datafile)
1073 1178 f.seek(0, 2)
1074 1179 actual = f.tell()
1075 1180 dd = actual - expected
1076 1181 except IOError, inst:
1077 1182 if inst.errno != errno.ENOENT:
1078 1183 raise
1079 1184 dd = 0
1080 1185
1081 1186 try:
1082 1187 f = self.opener(self.indexfile)
1083 1188 f.seek(0, 2)
1084 1189 actual = f.tell()
1085 1190 s = struct.calcsize(self.indexformat)
1086 1191 i = actual / s
1087 1192 di = actual - (i * s)
1088 1193 if self.inlinedata():
1089 1194 databytes = 0
1090 1195 for r in xrange(self.count()):
1091 1196 databytes += self.length(r)
1092 1197 dd = 0
1093 1198 di = actual - self.count() * s - databytes
1094 1199 except IOError, inst:
1095 1200 if inst.errno != errno.ENOENT:
1096 1201 raise
1097 1202 di = 0
1098 1203
1099 1204 return (dd, di)
1100 1205
1101 1206
General Comments 0
You need to be logged in to leave comments. Login now