##// END OF EJS Templates
Reduce index memory usage by storing the bare string instead of tuples...
mason@suse.com -
r2080:1cbb14c0 default
parent child Browse files
Show More
@@ -1,1206 +1,1211 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25
26 26 def flagstr(flag):
27 27 if flag == "inline":
28 28 return REVLOGNGINLINEDATA
29 29 raise RevlogError(_("unknown revlog flag %s" % flag))
30 30
31 31 def hash(text, p1, p2):
32 32 """generate a hash from the given text and its parent hashes
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 def compress(text):
46 46 """ generate a possibly-compressed representation of text """
47 47 if not text: return ("", text)
48 48 if len(text) < 44:
49 49 if text[0] == '\0': return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 return ("", bin)
56 56
57 57 def decompress(bin):
58 58 """ decompress the given input """
59 59 if not bin: return bin
60 60 t = bin[0]
61 61 if t == '\0': return bin
62 62 if t == 'x': return zlib.decompress(bin)
63 63 if t == 'u': return bin[1:]
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 67 v0shaoffset = 56
68 68 # index ng:
69 69 # 6 bytes offset
70 70 # 2 bytes flags
71 71 # 4 bytes compressed length
72 72 # 4 bytes uncompressed length
73 73 # 4 bytes: base rev
74 74 # 4 bytes link rev
75 75 # 4 bytes parent 1 rev
76 76 # 4 bytes parent 2 rev
77 77 # 32 bytes: nodeid
78 78 indexformatng = ">Qiiiiii20s12x"
79 79 ngshaoffset = 32
80 80 versionformat = ">i"
81 81
82 82 class lazyparser(object):
83 83 """
84 84 this class avoids the need to parse the entirety of large indices
85 85 """
86 86 def __init__(self, dataf, size, indexformat, shaoffset):
87 87 self.dataf = dataf
88 88 self.format = indexformat
89 89 self.s = struct.calcsize(indexformat)
90 90 self.indexformat = indexformat
91 91 self.datasize = size
92 92 self.l = size/self.s
93 93 self.index = [None] * self.l
94 94 self.map = {nullid: -1}
95 95 self.allmap = 0
96 96 self.all = 0
97 97 self.mapfind_count = 0
98 98 self.shaoffset = shaoffset
99 99
100 100 def loadmap(self):
101 101 """
102 102 during a commit, we need to make sure the rev being added is
103 103 not a duplicate. This requires loading the entire index,
104 104 which is fairly slow. loadmap can load up just the node map,
105 105 which takes much less time.
106 106 """
107 107 if self.allmap: return
108 108 start = 0
109 109 end = self.datasize
110 110 self.allmap = 1
111 111 cur = 0
112 112 count = 0
113 113 blocksize = self.s * 256
114 114 self.dataf.seek(0)
115 115 while cur < end:
116 116 data = self.dataf.read(blocksize)
117 117 off = 0
118 118 for x in xrange(256):
119 119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 120 self.map[n] = count
121 121 count += 1
122 122 if count >= self.l:
123 123 break
124 124 off += self.s
125 125 cur += blocksize
126 126
127 127 def loadblock(self, blockstart, blocksize, data=None):
128 128 if self.all: return
129 129 if data is None:
130 130 self.dataf.seek(blockstart)
131 131 data = self.dataf.read(blocksize)
132 132 lend = len(data) / self.s
133 133 i = blockstart / self.s
134 134 off = 0
135 135 for x in xrange(lend):
136 136 if self.index[i + x] == None:
137 137 b = data[off : off + self.s]
138 e = struct.unpack(self.format, b)
139 self.index[i + x] = e
140 self.map[e[-1]] = i + x
138 self.index[i + x] = b
139 n = b[self.shaoffset:self.shaoffset + 20]
140 self.map[n] = i + x
141 141 off += self.s
142 142
143 143 def findnode(self, node):
144 144 """search backwards through the index file for a specific node"""
145 145 if self.allmap: return None
146 146
147 147 # hg log will cause many many searches for the manifest
148 148 # nodes. After we get called a few times, just load the whole
149 149 # thing.
150 150 if self.mapfind_count > 8:
151 151 self.loadmap()
152 152 if node in self.map:
153 153 return node
154 154 return None
155 155 self.mapfind_count += 1
156 156 last = self.l - 1
157 157 while self.index[last] != None:
158 158 if last == 0:
159 159 self.all = 1
160 160 self.allmap = 1
161 161 return None
162 162 last -= 1
163 163 end = (last + 1) * self.s
164 164 blocksize = self.s * 256
165 165 while end >= 0:
166 166 start = max(end - blocksize, 0)
167 167 self.dataf.seek(start)
168 168 data = self.dataf.read(end - start)
169 169 findend = end - start
170 170 while True:
171 171 # we're searching backwards, so weh have to make sure
172 172 # we don't find a changeset where this node is a parent
173 173 off = data.rfind(node, 0, findend)
174 174 findend = off
175 175 if off >= 0:
176 176 i = off / self.s
177 177 off = i * self.s
178 178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 179 if n == node:
180 180 self.map[n] = i + start / self.s
181 181 return node
182 182 else:
183 183 break
184 184 end -= blocksize
185 185 return None
186 186
187 187 def loadindex(self, i=None, end=None):
188 188 if self.all: return
189 189 all = False
190 190 if i == None:
191 191 blockstart = 0
192 192 blocksize = (512 / self.s) * self.s
193 193 end = self.datasize
194 194 all = True
195 195 else:
196 196 if end:
197 197 blockstart = i * self.s
198 198 end = end * self.s
199 199 blocksize = end - blockstart
200 200 else:
201 201 blockstart = (i & ~(32)) * self.s
202 202 blocksize = self.s * 64
203 203 end = blockstart + blocksize
204 204 while blockstart < end:
205 205 self.loadblock(blockstart, blocksize)
206 206 blockstart += blocksize
207 207 if all: self.all = True
208 208
209 209 class lazyindex(object):
210 210 """a lazy version of the index array"""
211 211 def __init__(self, parser):
212 212 self.p = parser
213 213 def __len__(self):
214 214 return len(self.p.index)
215 215 def load(self, pos):
216 216 if pos < 0:
217 217 pos += len(self.p.index)
218 218 self.p.loadindex(pos)
219 219 return self.p.index[pos]
220 220 def __getitem__(self, pos):
221 return self.p.index[pos] or self.load(pos)
221 ret = self.p.index[pos] or self.load(pos)
222 if isinstance(ret, str):
223 ret = struct.unpack(self.p.indexformat, ret)
224 return ret
222 225 def __setitem__(self, pos, item):
223 226 self.p.index[pos] = item
224 227 def __delitem__(self, pos):
225 228 del self.p.index[pos]
226 229 def append(self, e):
227 230 self.p.index.append(e)
228 231
229 232 class lazymap(object):
230 233 """a lazy version of the node map"""
231 234 def __init__(self, parser):
232 235 self.p = parser
233 236 def load(self, key):
234 237 n = self.p.findnode(key)
235 238 if n == None:
236 239 raise KeyError(key)
237 240 def __contains__(self, key):
238 241 if key in self.p.map:
239 242 return True
240 243 self.p.loadmap()
241 244 return key in self.p.map
242 245 def __iter__(self):
243 246 yield nullid
244 247 for i in xrange(self.p.l):
245 try:
246 yield self.p.index[i][-1]
247 except:
248 ret = self.p.index[i]
249 if not ret:
248 250 self.p.loadindex(i)
249 yield self.p.index[i][-1]
251 ret = self.p.index[i]
252 if isinstance(ret, str):
253 ret = struct.unpack(self.p.indexformat, ret)
254 yield ret[-1]
250 255 def __getitem__(self, key):
251 256 try:
252 257 return self.p.map[key]
253 258 except KeyError:
254 259 try:
255 260 self.load(key)
256 261 return self.p.map[key]
257 262 except KeyError:
258 263 raise KeyError("node " + hex(key))
259 264 def __setitem__(self, key, val):
260 265 self.p.map[key] = val
261 266 def __delitem__(self, key):
262 267 del self.p.map[key]
263 268
264 269 class RevlogError(Exception): pass
265 270
266 271 class revlog(object):
267 272 """
268 273 the underlying revision storage object
269 274
270 275 A revlog consists of two parts, an index and the revision data.
271 276
272 277 The index is a file with a fixed record size containing
273 278 information on each revision, includings its nodeid (hash), the
274 279 nodeids of its parents, the position and offset of its data within
275 280 the data file, and the revision it's based on. Finally, each entry
276 281 contains a linkrev entry that can serve as a pointer to external
277 282 data.
278 283
279 284 The revision data itself is a linear collection of data chunks.
280 285 Each chunk represents a revision and is usually represented as a
281 286 delta against the previous chunk. To bound lookup time, runs of
282 287 deltas are limited to about 2 times the length of the original
283 288 version data. This makes retrieval of a version proportional to
284 289 its size, or O(1) relative to the number of revisions.
285 290
286 291 Both pieces of the revlog are written to in an append-only
287 292 fashion, which means we never need to rewrite a file to insert or
288 293 remove data, and can use some simple techniques to avoid the need
289 294 for locking while reading.
290 295 """
291 296 def __init__(self, opener, indexfile, datafile, defversion=0):
292 297 """
293 298 create a revlog object
294 299
295 300 opener is a function that abstracts the file opening operation
296 301 and can be used to implement COW semantics or the like.
297 302 """
298 303 self.indexfile = indexfile
299 304 self.datafile = datafile
300 305 self.opener = opener
301 306
302 307 self.indexstat = None
303 308 self.cache = None
304 309 self.chunkcache = None
305 310 self.defversion = defversion
306 311 self.load()
307 312
308 313 def load(self):
309 314 v = self.defversion
310 315 try:
311 316 f = self.opener(self.indexfile)
312 317 i = f.read(4)
313 318 f.seek(0)
314 319 except IOError, inst:
315 320 if inst.errno != errno.ENOENT:
316 321 raise
317 322 i = ""
318 323 else:
319 324 try:
320 325 st = os.fstat(f.fileno())
321 326 except AttributeError, inst:
322 327 st = None
323 328 else:
324 329 oldst = self.indexstat
325 330 if (oldst and st.st_dev == oldst.st_dev
326 331 and st.st_ino == oldst.st_ino
327 332 and st.st_mtime == oldst.st_mtime
328 333 and st.st_ctime == oldst.st_ctime):
329 334 return
330 335 self.indexstat = st
331 336 if len(i) > 0:
332 337 v = struct.unpack(versionformat, i)[0]
333 338 flags = v & ~0xFFFF
334 339 fmt = v & 0xFFFF
335 340 if fmt == 0:
336 341 if flags:
337 342 raise RevlogError(_("index %s invalid flags %x for format v0" %
338 343 (self.indexfile, flags)))
339 344 elif fmt == REVLOGNG:
340 345 if flags & ~REVLOGNGINLINEDATA:
341 346 raise RevlogError(_("index %s invalid flags %x for revlogng" %
342 347 (self.indexfile, flags)))
343 348 else:
344 349 raise RevlogError(_("index %s invalid format %d" %
345 350 (self.indexfile, fmt)))
346 351 self.version = v
347 352 if v == 0:
348 353 self.indexformat = indexformatv0
349 354 shaoffset = v0shaoffset
350 355 else:
351 356 self.indexformat = indexformatng
352 357 shaoffset = ngshaoffset
353 358
354 359 if i:
355 360 if not self.inlinedata() and st and st.st_size > 10000:
356 361 # big index, let's parse it on demand
357 362 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
358 363 self.index = lazyindex(parser)
359 364 self.nodemap = lazymap(parser)
360 365 else:
361 366 i = f.read()
362 367 self.parseindex(i)
363 368 if self.inlinedata():
364 369 # we've already got the entire data file read in, save it
365 370 # in the chunk data
366 371 self.chunkcache = (0, i)
367 372 if self.version != 0:
368 373 e = list(self.index[0])
369 374 type = self.ngtype(e[0])
370 375 e[0] = self.offset_type(0, type)
371 376 self.index[0] = e
372 377 else:
373 378 self.nodemap = { nullid: -1}
374 379 self.index = []
375 380
376 381
377 382 def parseindex(self, data):
378 383 s = struct.calcsize(self.indexformat)
379 384 l = len(data)
380 385 self.index = []
381 386 self.nodemap = {nullid: -1}
382 387 inline = self.inlinedata()
383 388 off = 0
384 389 n = 0
385 390 while off < l:
386 391 e = struct.unpack(self.indexformat, data[off:off + s])
387 392 self.index.append(e)
388 393 self.nodemap[e[-1]] = n
389 394 n += 1
390 395 off += s
391 396 if inline:
392 397 off += e[1]
393 398
394 399 def ngoffset(self, q):
395 400 if q & 0xFFFF:
396 401 raise RevlogError(_('%s: incompatible revision flag %x') %
397 402 (self.indexfile, type))
398 403 return long(q >> 16)
399 404
400 405 def ngtype(self, q):
401 406 return int(q & 0xFFFF)
402 407
403 408 def offset_type(self, offset, type):
404 409 return long(long(offset) << 16 | type)
405 410
406 411 def loadindex(self, start, end):
407 412 """load a block of indexes all at once from the lazy parser"""
408 413 if isinstance(self.index, lazyindex):
409 414 self.index.p.loadindex(start, end)
410 415
411 416 def loadindexmap(self):
412 417 """loads both the map and the index from the lazy parser"""
413 418 if isinstance(self.index, lazyindex):
414 419 p = self.index.p
415 420 p.loadindex()
416 421 self.nodemap = p.map
417 422
418 423 def loadmap(self):
419 424 """loads the map from the lazy parser"""
420 425 if isinstance(self.nodemap, lazymap):
421 426 self.nodemap.p.loadmap()
422 427 self.nodemap = self.nodemap.p.map
423 428
424 429 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
425 430 def tip(self): return self.node(len(self.index) - 1)
426 431 def count(self): return len(self.index)
427 432 def node(self, rev):
428 433 return (rev < 0) and nullid or self.index[rev][-1]
429 434 def rev(self, node):
430 435 try:
431 436 return self.nodemap[node]
432 437 except KeyError:
433 438 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
434 439 def linkrev(self, node): return self.index[self.rev(node)][-4]
435 440 def parents(self, node):
436 441 if node == nullid: return (nullid, nullid)
437 442 r = self.rev(node)
438 443 d = self.index[r][-3:-1]
439 444 if self.version == 0:
440 445 return d
441 446 return [ self.node(x) for x in d ]
442 447 def start(self, rev):
443 448 if rev < 0:
444 449 return -1
445 450 if self.version != 0:
446 451 return self.ngoffset(self.index[rev][0])
447 452 return self.index[rev][0]
448 453
449 454 def end(self, rev): return self.start(rev) + self.length(rev)
450 455
451 456 def size(self, rev):
452 457 """return the length of the uncompressed text for a given revision"""
453 458 l = -1
454 459 if self.version != 0:
455 460 l = self.index[rev][2]
456 461 if l >= 0:
457 462 return l
458 463
459 464 t = self.revision(self.node(rev))
460 465 return len(t)
461 466
462 467 # alternate implementation, The advantage to this code is it
463 468 # will be faster for a single revision. But, the results are not
464 469 # cached, so finding the size of every revision will be slower.
465 470 """
466 471 if self.cache and self.cache[1] == rev:
467 472 return len(self.cache[2])
468 473
469 474 base = self.base(rev)
470 475 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
471 476 base = self.cache[1]
472 477 text = self.cache[2]
473 478 else:
474 479 text = self.revision(self.node(base))
475 480
476 481 l = len(text)
477 482 for x in xrange(base + 1, rev + 1):
478 483 l = mdiff.patchedsize(l, self.chunk(x))
479 484 return l
480 485 """
481 486
482 487 def length(self, rev):
483 488 if rev < 0:
484 489 return 0
485 490 else:
486 491 return self.index[rev][1]
487 492 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
488 493
489 494 def reachable(self, rev, stop=None):
490 495 reachable = {}
491 496 visit = [rev]
492 497 reachable[rev] = 1
493 498 if stop:
494 499 stopn = self.rev(stop)
495 500 else:
496 501 stopn = 0
497 502 while visit:
498 503 n = visit.pop(0)
499 504 if n == stop:
500 505 continue
501 506 if n == nullid:
502 507 continue
503 508 for p in self.parents(n):
504 509 if self.rev(p) < stopn:
505 510 continue
506 511 if p not in reachable:
507 512 reachable[p] = 1
508 513 visit.append(p)
509 514 return reachable
510 515
511 516 def nodesbetween(self, roots=None, heads=None):
512 517 """Return a tuple containing three elements. Elements 1 and 2 contain
513 518 a final list bases and heads after all the unreachable ones have been
514 519 pruned. Element 0 contains a topologically sorted list of all
515 520
516 521 nodes that satisfy these constraints:
517 522 1. All nodes must be descended from a node in roots (the nodes on
518 523 roots are considered descended from themselves).
519 524 2. All nodes must also be ancestors of a node in heads (the nodes in
520 525 heads are considered to be their own ancestors).
521 526
522 527 If roots is unspecified, nullid is assumed as the only root.
523 528 If heads is unspecified, it is taken to be the output of the
524 529 heads method (i.e. a list of all nodes in the repository that
525 530 have no children)."""
526 531 nonodes = ([], [], [])
527 532 if roots is not None:
528 533 roots = list(roots)
529 534 if not roots:
530 535 return nonodes
531 536 lowestrev = min([self.rev(n) for n in roots])
532 537 else:
533 538 roots = [nullid] # Everybody's a descendent of nullid
534 539 lowestrev = -1
535 540 if (lowestrev == -1) and (heads is None):
536 541 # We want _all_ the nodes!
537 542 return ([self.node(r) for r in xrange(0, self.count())],
538 543 [nullid], list(self.heads()))
539 544 if heads is None:
540 545 # All nodes are ancestors, so the latest ancestor is the last
541 546 # node.
542 547 highestrev = self.count() - 1
543 548 # Set ancestors to None to signal that every node is an ancestor.
544 549 ancestors = None
545 550 # Set heads to an empty dictionary for later discovery of heads
546 551 heads = {}
547 552 else:
548 553 heads = list(heads)
549 554 if not heads:
550 555 return nonodes
551 556 ancestors = {}
552 557 # Start at the top and keep marking parents until we're done.
553 558 nodestotag = heads[:]
554 559 # Turn heads into a dictionary so we can remove 'fake' heads.
555 560 # Also, later we will be using it to filter out the heads we can't
556 561 # find from roots.
557 562 heads = dict.fromkeys(heads, 0)
558 563 # Remember where the top was so we can use it as a limit later.
559 564 highestrev = max([self.rev(n) for n in nodestotag])
560 565 while nodestotag:
561 566 # grab a node to tag
562 567 n = nodestotag.pop()
563 568 # Never tag nullid
564 569 if n == nullid:
565 570 continue
566 571 # A node's revision number represents its place in a
567 572 # topologically sorted list of nodes.
568 573 r = self.rev(n)
569 574 if r >= lowestrev:
570 575 if n not in ancestors:
571 576 # If we are possibly a descendent of one of the roots
572 577 # and we haven't already been marked as an ancestor
573 578 ancestors[n] = 1 # Mark as ancestor
574 579 # Add non-nullid parents to list of nodes to tag.
575 580 nodestotag.extend([p for p in self.parents(n) if
576 581 p != nullid])
577 582 elif n in heads: # We've seen it before, is it a fake head?
578 583 # So it is, real heads should not be the ancestors of
579 584 # any other heads.
580 585 heads.pop(n)
581 586 if not ancestors:
582 587 return nonodes
583 588 # Now that we have our set of ancestors, we want to remove any
584 589 # roots that are not ancestors.
585 590
586 591 # If one of the roots was nullid, everything is included anyway.
587 592 if lowestrev > -1:
588 593 # But, since we weren't, let's recompute the lowest rev to not
589 594 # include roots that aren't ancestors.
590 595
591 596 # Filter out roots that aren't ancestors of heads
592 597 roots = [n for n in roots if n in ancestors]
593 598 # Recompute the lowest revision
594 599 if roots:
595 600 lowestrev = min([self.rev(n) for n in roots])
596 601 else:
597 602 # No more roots? Return empty list
598 603 return nonodes
599 604 else:
600 605 # We are descending from nullid, and don't need to care about
601 606 # any other roots.
602 607 lowestrev = -1
603 608 roots = [nullid]
604 609 # Transform our roots list into a 'set' (i.e. a dictionary where the
605 610 # values don't matter.
606 611 descendents = dict.fromkeys(roots, 1)
607 612 # Also, keep the original roots so we can filter out roots that aren't
608 613 # 'real' roots (i.e. are descended from other roots).
609 614 roots = descendents.copy()
610 615 # Our topologically sorted list of output nodes.
611 616 orderedout = []
612 617 # Don't start at nullid since we don't want nullid in our output list,
613 618 # and if nullid shows up in descedents, empty parents will look like
614 619 # they're descendents.
615 620 for r in xrange(max(lowestrev, 0), highestrev + 1):
616 621 n = self.node(r)
617 622 isdescendent = False
618 623 if lowestrev == -1: # Everybody is a descendent of nullid
619 624 isdescendent = True
620 625 elif n in descendents:
621 626 # n is already a descendent
622 627 isdescendent = True
623 628 # This check only needs to be done here because all the roots
624 629 # will start being marked is descendents before the loop.
625 630 if n in roots:
626 631 # If n was a root, check if it's a 'real' root.
627 632 p = tuple(self.parents(n))
628 633 # If any of its parents are descendents, it's not a root.
629 634 if (p[0] in descendents) or (p[1] in descendents):
630 635 roots.pop(n)
631 636 else:
632 637 p = tuple(self.parents(n))
633 638 # A node is a descendent if either of its parents are
634 639 # descendents. (We seeded the dependents list with the roots
635 640 # up there, remember?)
636 641 if (p[0] in descendents) or (p[1] in descendents):
637 642 descendents[n] = 1
638 643 isdescendent = True
639 644 if isdescendent and ((ancestors is None) or (n in ancestors)):
640 645 # Only include nodes that are both descendents and ancestors.
641 646 orderedout.append(n)
642 647 if (ancestors is not None) and (n in heads):
643 648 # We're trying to figure out which heads are reachable
644 649 # from roots.
645 650 # Mark this head as having been reached
646 651 heads[n] = 1
647 652 elif ancestors is None:
648 653 # Otherwise, we're trying to discover the heads.
649 654 # Assume this is a head because if it isn't, the next step
650 655 # will eventually remove it.
651 656 heads[n] = 1
652 657 # But, obviously its parents aren't.
653 658 for p in self.parents(n):
654 659 heads.pop(p, None)
655 660 heads = [n for n in heads.iterkeys() if heads[n] != 0]
656 661 roots = roots.keys()
657 662 assert orderedout
658 663 assert roots
659 664 assert heads
660 665 return (orderedout, roots, heads)
661 666
662 667 def heads(self, start=None):
663 668 """return the list of all nodes that have no children
664 669
665 670 if start is specified, only heads that are descendants of
666 671 start will be returned
667 672
668 673 """
669 674 if start is None:
670 675 start = nullid
671 676 reachable = {start: 1}
672 677 heads = {start: 1}
673 678 startrev = self.rev(start)
674 679
675 680 for r in xrange(startrev + 1, self.count()):
676 681 n = self.node(r)
677 682 for pn in self.parents(n):
678 683 if pn in reachable:
679 684 reachable[n] = 1
680 685 heads[n] = 1
681 686 if pn in heads:
682 687 del heads[pn]
683 688 return heads.keys()
684 689
685 690 def children(self, node):
686 691 """find the children of a given node"""
687 692 c = []
688 693 p = self.rev(node)
689 694 for r in range(p + 1, self.count()):
690 695 n = self.node(r)
691 696 for pn in self.parents(n):
692 697 if pn == node:
693 698 c.append(n)
694 699 continue
695 700 elif pn == nullid:
696 701 continue
697 702 return c
698 703
699 704 def lookup(self, id):
700 705 """locate a node based on revision number or subset of hex nodeid"""
701 706 try:
702 707 rev = int(id)
703 708 if str(rev) != id: raise ValueError
704 709 if rev < 0: rev = self.count() + rev
705 710 if rev < 0 or rev >= self.count(): raise ValueError
706 711 return self.node(rev)
707 712 except (ValueError, OverflowError):
708 713 c = []
709 714 for n in self.nodemap:
710 715 if hex(n).startswith(id):
711 716 c.append(n)
712 717 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
713 718 if len(c) < 1: raise RevlogError(_("No match found"))
714 719 return c[0]
715 720
716 721 return None
717 722
718 723 def diff(self, a, b):
719 724 """return a delta between two revisions"""
720 725 return mdiff.textdiff(a, b)
721 726
722 727 def patches(self, t, pl):
723 728 """apply a list of patches to a string"""
724 729 return mdiff.patches(t, pl)
725 730
726 731 def chunk(self, rev, df=None, cachelen=4096):
727 732 start, length = self.start(rev), self.length(rev)
728 733 inline = self.inlinedata()
729 734 if inline:
730 735 start += (rev + 1) * struct.calcsize(self.indexformat)
731 736 end = start + length
732 737 def loadcache(df):
733 738 cache_length = max(cachelen, length) # 4k
734 739 if not df:
735 740 if inline:
736 741 df = self.opener(self.indexfile)
737 742 else:
738 743 df = self.opener(self.datafile)
739 744 df.seek(start)
740 745 self.chunkcache = (start, df.read(cache_length))
741 746
742 747 if not self.chunkcache:
743 748 loadcache(df)
744 749
745 750 cache_start = self.chunkcache[0]
746 751 cache_end = cache_start + len(self.chunkcache[1])
747 752 if start >= cache_start and end <= cache_end:
748 753 # it is cached
749 754 offset = start - cache_start
750 755 else:
751 756 loadcache(df)
752 757 offset = 0
753 758
754 759 #def checkchunk():
755 760 # df = self.opener(self.datafile)
756 761 # df.seek(start)
757 762 # return df.read(length)
758 763 #assert s == checkchunk()
759 764 return decompress(self.chunkcache[1][offset:offset + length])
760 765
761 766 def delta(self, node):
762 767 """return or calculate a delta between a node and its predecessor"""
763 768 r = self.rev(node)
764 769 return self.revdiff(r - 1, r)
765 770
766 771 def revdiff(self, rev1, rev2):
767 772 """return or calculate a delta between two revisions"""
768 773 b1 = self.base(rev1)
769 774 b2 = self.base(rev2)
770 775 if b1 == b2 and rev1 + 1 == rev2:
771 776 return self.chunk(rev2)
772 777 else:
773 778 return self.diff(self.revision(self.node(rev1)),
774 779 self.revision(self.node(rev2)))
775 780
776 781 def revision(self, node):
777 782 """return an uncompressed revision of a given"""
778 783 if node == nullid: return ""
779 784 if self.cache and self.cache[0] == node: return self.cache[2]
780 785
781 786 # look up what we need to read
782 787 text = None
783 788 rev = self.rev(node)
784 789 base = self.base(rev)
785 790
786 791 if self.inlinedata():
787 792 # we probably have the whole chunk cached
788 793 df = None
789 794 else:
790 795 df = self.opener(self.datafile)
791 796
792 797 # do we have useful data cached?
793 798 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
794 799 base = self.cache[1]
795 800 text = self.cache[2]
796 801 self.loadindex(base, rev + 1)
797 802 else:
798 803 self.loadindex(base, rev + 1)
799 804 text = self.chunk(base, df=df)
800 805
801 806 bins = []
802 807 for r in xrange(base + 1, rev + 1):
803 808 bins.append(self.chunk(r, df=df))
804 809
805 810 text = self.patches(text, bins)
806 811
807 812 p1, p2 = self.parents(node)
808 813 if node != hash(text, p1, p2):
809 814 raise RevlogError(_("integrity check failed on %s:%d")
810 815 % (self.datafile, rev))
811 816
812 817 self.cache = (node, rev, text)
813 818 return text
814 819
815 820 def checkinlinesize(self, tr, fp=None):
816 821 if not self.inlinedata():
817 822 return
818 823 if not fp:
819 824 fp = self.opener(self.indexfile, 'r')
820 825 size = fp.tell()
821 826 if size < 131072:
822 827 return
823 828 tr.add(self.datafile, 0)
824 829 df = self.opener(self.datafile, 'w')
825 830 calc = struct.calcsize(self.indexformat)
826 831 for r in xrange(self.count()):
827 832 start = self.start(r) + (r + 1) * calc
828 833 length = self.length(r)
829 834 fp.seek(start)
830 835 d = fp.read(length)
831 836 df.write(d)
832 837 fp.close()
833 838 df.close()
834 839 fp = self.opener(self.indexfile, 'w', atomictemp=True)
835 840 self.version &= ~(REVLOGNGINLINEDATA)
836 841 if self.count():
837 842 x = self.index[0]
838 843 e = struct.pack(self.indexformat, *x)[4:]
839 844 l = struct.pack(versionformat, self.version)
840 845 fp.write(l)
841 846 fp.write(e)
842 847
843 848 for i in xrange(1, self.count()):
844 849 x = self.index[i]
845 850 e = struct.pack(self.indexformat, *x)
846 851 fp.write(e)
847 852
848 853 # if we don't call rename, the temp file will never replace the
849 854 # real index
850 855 fp.rename()
851 856 self.chunkcache = None
852 857
853 858 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
854 859 """add a revision to the log
855 860
856 861 text - the revision data to add
857 862 transaction - the transaction object used for rollback
858 863 link - the linkrev data to add
859 864 p1, p2 - the parent nodeids of the revision
860 865 d - an optional precomputed delta
861 866 """
862 867 if text is None: text = ""
863 868 if p1 is None: p1 = self.tip()
864 869 if p2 is None: p2 = nullid
865 870
866 871 node = hash(text, p1, p2)
867 872
868 873 if node in self.nodemap:
869 874 return node
870 875
871 876 n = self.count()
872 877 t = n - 1
873 878
874 879 if n:
875 880 base = self.base(t)
876 881 start = self.start(base)
877 882 end = self.end(t)
878 883 if not d:
879 884 prev = self.revision(self.tip())
880 885 d = self.diff(prev, str(text))
881 886 data = compress(d)
882 887 l = len(data[1]) + len(data[0])
883 888 dist = end - start + l
884 889
885 890 # full versions are inserted when the needed deltas
886 891 # become comparable to the uncompressed text
887 892 if not n or dist > len(text) * 2:
888 893 data = compress(text)
889 894 l = len(data[1]) + len(data[0])
890 895 base = n
891 896 else:
892 897 base = self.base(t)
893 898
894 899 offset = 0
895 900 if t >= 0:
896 901 offset = self.end(t)
897 902
898 903 if self.version == 0:
899 904 e = (offset, l, base, link, p1, p2, node)
900 905 else:
901 906 e = (self.offset_type(offset, 0), l, len(text),
902 907 base, link, self.rev(p1), self.rev(p2), node)
903 908
904 909 self.index.append(e)
905 910 self.nodemap[node] = n
906 911 entry = struct.pack(self.indexformat, *e)
907 912
908 913 if not self.inlinedata():
909 914 transaction.add(self.datafile, offset)
910 915 transaction.add(self.indexfile, n * len(entry))
911 916 f = self.opener(self.datafile, "a")
912 917 if data[0]:
913 918 f.write(data[0])
914 919 f.write(data[1])
915 920 f = self.opener(self.indexfile, "a")
916 921 else:
917 922 f = self.opener(self.indexfile, "a+")
918 923 f.seek(0, 2)
919 924 transaction.add(self.indexfile, f.tell())
920 925
921 926 if len(self.index) == 1 and self.version != 0:
922 927 l = struct.pack(versionformat, self.version)
923 928 f.write(l)
924 929 entry = entry[4:]
925 930
926 931 f.write(entry)
927 932
928 933 if self.inlinedata():
929 934 f.write(data[0])
930 935 f.write(data[1])
931 936 self.checkinlinesize(transaction, f)
932 937
933 938 self.cache = (node, n, text)
934 939 return node
935 940
936 941 def ancestor(self, a, b):
937 942 """calculate the least common ancestor of nodes a and b"""
938 943 # calculate the distance of every node from root
939 944 dist = {nullid: 0}
940 945 for i in xrange(self.count()):
941 946 n = self.node(i)
942 947 p1, p2 = self.parents(n)
943 948 dist[n] = max(dist[p1], dist[p2]) + 1
944 949
945 950 # traverse ancestors in order of decreasing distance from root
946 951 def ancestors(node):
947 952 # we store negative distances because heap returns smallest member
948 953 h = [(-dist[node], node)]
949 954 seen = {}
950 955 while h:
951 956 d, n = heapq.heappop(h)
952 957 if n not in seen:
953 958 seen[n] = 1
954 959 yield (-d, n)
955 960 for p in self.parents(n):
956 961 heapq.heappush(h, (-dist[p], p))
957 962
958 963 def generations(node):
959 964 sg, s = None, {}
960 965 for g,n in ancestors(node):
961 966 if g != sg:
962 967 if sg:
963 968 yield sg, s
964 969 sg, s = g, {n:1}
965 970 else:
966 971 s[n] = 1
967 972 yield sg, s
968 973
969 974 x = generations(a)
970 975 y = generations(b)
971 976 gx = x.next()
972 977 gy = y.next()
973 978
974 979 # increment each ancestor list until it is closer to root than
975 980 # the other, or they match
976 981 while 1:
977 982 #print "ancestor gen %s %s" % (gx[0], gy[0])
978 983 if gx[0] == gy[0]:
979 984 # find the intersection
980 985 i = [ n for n in gx[1] if n in gy[1] ]
981 986 if i:
982 987 return i[0]
983 988 else:
984 989 #print "next"
985 990 gy = y.next()
986 991 gx = x.next()
987 992 elif gx[0] < gy[0]:
988 993 #print "next y"
989 994 gy = y.next()
990 995 else:
991 996 #print "next x"
992 997 gx = x.next()
993 998
994 999 def group(self, nodelist, lookup, infocollect=None):
995 1000 """calculate a delta group
996 1001
997 1002 Given a list of changeset revs, return a set of deltas and
998 1003 metadata corresponding to nodes. the first delta is
999 1004 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1000 1005 have this parent as it has all history before these
1001 1006 changesets. parent is parent[0]
1002 1007 """
1003 1008 revs = [self.rev(n) for n in nodelist]
1004 1009
1005 1010 # if we don't have any revisions touched by these changesets, bail
1006 1011 if not revs:
1007 1012 yield changegroup.closechunk()
1008 1013 return
1009 1014
1010 1015 # add the parent of the first rev
1011 1016 p = self.parents(self.node(revs[0]))[0]
1012 1017 revs.insert(0, self.rev(p))
1013 1018
1014 1019 # build deltas
1015 1020 for d in xrange(0, len(revs) - 1):
1016 1021 a, b = revs[d], revs[d + 1]
1017 1022 nb = self.node(b)
1018 1023
1019 1024 if infocollect is not None:
1020 1025 infocollect(nb)
1021 1026
1022 1027 d = self.revdiff(a, b)
1023 1028 p = self.parents(nb)
1024 1029 meta = nb + p[0] + p[1] + lookup(nb)
1025 1030 yield changegroup.genchunk("%s%s" % (meta, d))
1026 1031
1027 1032 yield changegroup.closechunk()
1028 1033
1029 1034 def addgroup(self, revs, linkmapper, transaction, unique=0):
1030 1035 """
1031 1036 add a delta group
1032 1037
1033 1038 given a set of deltas, add them to the revision log. the
1034 1039 first delta is against its parent, which should be in our
1035 1040 log, the rest are against the previous delta.
1036 1041 """
1037 1042
1038 1043 #track the base of the current delta log
1039 1044 r = self.count()
1040 1045 t = r - 1
1041 1046 node = None
1042 1047
1043 1048 base = prev = -1
1044 1049 start = end = textlen = 0
1045 1050 if r:
1046 1051 end = self.end(t)
1047 1052
1048 1053 ifh = self.opener(self.indexfile, "a+")
1049 1054 ifh.seek(0, 2)
1050 1055 transaction.add(self.indexfile, ifh.tell())
1051 1056 if self.inlinedata():
1052 1057 dfh = None
1053 1058 else:
1054 1059 transaction.add(self.datafile, end)
1055 1060 dfh = self.opener(self.datafile, "a")
1056 1061
1057 1062 # loop through our set of deltas
1058 1063 chain = None
1059 1064 for chunk in revs:
1060 1065 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1061 1066 link = linkmapper(cs)
1062 1067 if node in self.nodemap:
1063 1068 # this can happen if two branches make the same change
1064 1069 # if unique:
1065 1070 # raise RevlogError(_("already have %s") % hex(node[:4]))
1066 1071 chain = node
1067 1072 continue
1068 1073 delta = chunk[80:]
1069 1074
1070 1075 for p in (p1, p2):
1071 1076 if not p in self.nodemap:
1072 1077 raise RevlogError(_("unknown parent %s") % short(p1))
1073 1078
1074 1079 if not chain:
1075 1080 # retrieve the parent revision of the delta chain
1076 1081 chain = p1
1077 1082 if not chain in self.nodemap:
1078 1083 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1079 1084
1080 1085 # full versions are inserted when the needed deltas become
1081 1086 # comparable to the uncompressed text or when the previous
1082 1087 # version is not the one we have a delta against. We use
1083 1088 # the size of the previous full rev as a proxy for the
1084 1089 # current size.
1085 1090
1086 1091 if chain == prev:
1087 1092 tempd = compress(delta)
1088 1093 cdelta = tempd[0] + tempd[1]
1089 1094 textlen = mdiff.patchedsize(textlen, delta)
1090 1095
1091 1096 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1092 1097 # flush our writes here so we can read it in revision
1093 1098 if dfh:
1094 1099 dfh.flush()
1095 1100 ifh.flush()
1096 1101 text = self.revision(chain)
1097 1102 text = self.patches(text, [delta])
1098 1103 chk = self.addrevision(text, transaction, link, p1, p2)
1099 1104 if chk != node:
1100 1105 raise RevlogError(_("consistency error adding group"))
1101 1106 textlen = len(text)
1102 1107 else:
1103 1108 if self.version == 0:
1104 1109 e = (end, len(cdelta), base, link, p1, p2, node)
1105 1110 else:
1106 1111 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1107 1112 link, self.rev(p1), self.rev(p2), node)
1108 1113 self.index.append(e)
1109 1114 self.nodemap[node] = r
1110 1115 if self.inlinedata():
1111 1116 ifh.write(struct.pack(self.indexformat, *e))
1112 1117 ifh.write(cdelta)
1113 1118 self.checkinlinesize(transaction, ifh)
1114 1119 if not self.inlinedata():
1115 1120 dfh = self.opener(self.datafile, "a")
1116 1121 ifh = self.opener(self.indexfile, "a")
1117 1122 else:
1118 1123 if not dfh:
1119 1124 # addrevision switched from inline to conventional
1120 1125 # reopen the index
1121 1126 dfh = self.opener(self.datafile, "a")
1122 1127 ifh = self.opener(self.indexfile, "a")
1123 1128 dfh.write(cdelta)
1124 1129 ifh.write(struct.pack(self.indexformat, *e))
1125 1130
1126 1131 t, r, chain, prev = r, r + 1, node, node
1127 1132 base = self.base(t)
1128 1133 start = self.start(base)
1129 1134 end = self.end(t)
1130 1135
1131 1136 if node is None:
1132 1137 raise RevlogError(_("group to be added is empty"))
1133 1138 return node
1134 1139
1135 1140 def strip(self, rev, minlink):
1136 1141 if self.count() == 0 or rev >= self.count():
1137 1142 return
1138 1143
1139 1144 if isinstance(self.index, lazyindex):
1140 1145 self.loadindexmap()
1141 1146
1142 1147 # When stripping away a revision, we need to make sure it
1143 1148 # does not actually belong to an older changeset.
1144 1149 # The minlink parameter defines the oldest revision
1145 1150 # we're allowed to strip away.
1146 1151 while minlink > self.index[rev][-4]:
1147 1152 rev += 1
1148 1153 if rev >= self.count():
1149 1154 return
1150 1155
1151 1156 # first truncate the files on disk
1152 1157 end = self.start(rev)
1153 1158 if not self.inlinedata():
1154 1159 df = self.opener(self.datafile, "a")
1155 1160 df.truncate(end)
1156 1161 end = rev * struct.calcsize(self.indexformat)
1157 1162 else:
1158 1163 end += rev * struct.calcsize(self.indexformat)
1159 1164
1160 1165 indexf = self.opener(self.indexfile, "a")
1161 1166 indexf.truncate(end)
1162 1167
1163 1168 # then reset internal state in memory to forget those revisions
1164 1169 self.cache = None
1165 1170 self.chunkcache = None
1166 1171 for x in xrange(rev, self.count()):
1167 1172 del self.nodemap[self.node(x)]
1168 1173
1169 1174 del self.index[rev:]
1170 1175
1171 1176 def checksize(self):
1172 1177 expected = 0
1173 1178 if self.count():
1174 1179 expected = self.end(self.count() - 1)
1175 1180
1176 1181 try:
1177 1182 f = self.opener(self.datafile)
1178 1183 f.seek(0, 2)
1179 1184 actual = f.tell()
1180 1185 dd = actual - expected
1181 1186 except IOError, inst:
1182 1187 if inst.errno != errno.ENOENT:
1183 1188 raise
1184 1189 dd = 0
1185 1190
1186 1191 try:
1187 1192 f = self.opener(self.indexfile)
1188 1193 f.seek(0, 2)
1189 1194 actual = f.tell()
1190 1195 s = struct.calcsize(self.indexformat)
1191 1196 i = actual / s
1192 1197 di = actual - (i * s)
1193 1198 if self.inlinedata():
1194 1199 databytes = 0
1195 1200 for r in xrange(self.count()):
1196 1201 databytes += self.length(r)
1197 1202 dd = 0
1198 1203 di = actual - self.count() * s - databytes
1199 1204 except IOError, inst:
1200 1205 if inst.errno != errno.ENOENT:
1201 1206 raise
1202 1207 di = 0
1203 1208
1204 1209 return (dd, di)
1205 1210
1206 1211
General Comments 0
You need to be logged in to leave comments. Login now