##// END OF EJS Templates
revlog.strip should clear the chunkcache...
mason@suse.com -
r1711:8959700c default
parent child Browse files
Show More
@@ -1,869 +1,870
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
17 17
18 18 def hash(text, p1, p2):
19 19 """generate a hash from the given text and its parent hashes
20 20
21 21 This hash combines both the current file contents and its history
22 22 in a manner that makes it easy to distinguish nodes with the same
23 23 content in the revision graph.
24 24 """
25 25 l = [p1, p2]
26 26 l.sort()
27 27 s = sha.new(l[0])
28 28 s.update(l[1])
29 29 s.update(text)
30 30 return s.digest()
31 31
32 32 def compress(text):
33 33 """ generate a possibly-compressed representation of text """
34 34 if not text: return ("", text)
35 35 if len(text) < 44:
36 36 if text[0] == '\0': return ("", text)
37 37 return ('u', text)
38 38 bin = zlib.compress(text)
39 39 if len(bin) > len(text):
40 40 if text[0] == '\0': return ("", text)
41 41 return ('u', text)
42 42 return ("", bin)
43 43
44 44 def decompress(bin):
45 45 """ decompress the given input """
46 46 if not bin: return bin
47 47 t = bin[0]
48 48 if t == '\0': return bin
49 49 if t == 'x': return zlib.decompress(bin)
50 50 if t == 'u': return bin[1:]
51 51 raise RevlogError(_("unknown compression type %s") % t)
52 52
53 53 indexformat = ">4l20s20s20s"
54 54
55 55 class lazyparser(object):
56 56 """
57 57 this class avoids the need to parse the entirety of large indices
58 58
59 59 By default we parse and load 1000 entries at a time.
60 60
61 61 If no position is specified, we load the whole index, and replace
62 62 the lazy objects in revlog with the underlying objects for
63 63 efficiency in cases where we look at most of the nodes.
64 64 """
65 65 def __init__(self, data, revlog):
66 66 self.data = data
67 67 self.s = struct.calcsize(indexformat)
68 68 self.l = len(data)/self.s
69 69 self.index = [None] * self.l
70 70 self.map = {nullid: -1}
71 71 self.all = 0
72 72 self.revlog = revlog
73 73
74 74 def trunc(self, pos):
75 75 self.l = pos/self.s
76 76
77 77 def load(self, pos=None):
78 78 if self.all: return
79 79 if pos is not None:
80 80 block = pos / 1000
81 81 i = block * 1000
82 82 end = min(self.l, i + 1000)
83 83 else:
84 84 self.all = 1
85 85 i = 0
86 86 end = self.l
87 87 self.revlog.index = self.index
88 88 self.revlog.nodemap = self.map
89 89
90 90 while i < end:
91 91 d = self.data[i * self.s: (i + 1) * self.s]
92 92 e = struct.unpack(indexformat, d)
93 93 self.index[i] = e
94 94 self.map[e[6]] = i
95 95 i += 1
96 96
97 97 class lazyindex(object):
98 98 """a lazy version of the index array"""
99 99 def __init__(self, parser):
100 100 self.p = parser
101 101 def __len__(self):
102 102 return len(self.p.index)
103 103 def load(self, pos):
104 104 if pos < 0:
105 105 pos += len(self.p.index)
106 106 self.p.load(pos)
107 107 return self.p.index[pos]
108 108 def __getitem__(self, pos):
109 109 return self.p.index[pos] or self.load(pos)
110 110 def __delitem__(self, pos):
111 111 del self.p.index[pos]
112 112 def append(self, e):
113 113 self.p.index.append(e)
114 114 def trunc(self, pos):
115 115 self.p.trunc(pos)
116 116
117 117 class lazymap(object):
118 118 """a lazy version of the node map"""
119 119 def __init__(self, parser):
120 120 self.p = parser
121 121 def load(self, key):
122 122 if self.p.all: return
123 123 n = self.p.data.find(key)
124 124 if n < 0:
125 125 raise KeyError(key)
126 126 pos = n / self.p.s
127 127 self.p.load(pos)
128 128 def __contains__(self, key):
129 129 self.p.load()
130 130 return key in self.p.map
131 131 def __iter__(self):
132 132 yield nullid
133 133 for i in xrange(self.p.l):
134 134 try:
135 135 yield self.p.index[i][6]
136 136 except:
137 137 self.p.load(i)
138 138 yield self.p.index[i][6]
139 139 def __getitem__(self, key):
140 140 try:
141 141 return self.p.map[key]
142 142 except KeyError:
143 143 try:
144 144 self.load(key)
145 145 return self.p.map[key]
146 146 except KeyError:
147 147 raise KeyError("node " + hex(key))
148 148 def __setitem__(self, key, val):
149 149 self.p.map[key] = val
150 150 def __delitem__(self, key):
151 151 del self.p.map[key]
152 152
153 153 class RevlogError(Exception): pass
154 154
155 155 class revlog(object):
156 156 """
157 157 the underlying revision storage object
158 158
159 159 A revlog consists of two parts, an index and the revision data.
160 160
161 161 The index is a file with a fixed record size containing
162 162 information on each revision, includings its nodeid (hash), the
163 163 nodeids of its parents, the position and offset of its data within
164 164 the data file, and the revision it's based on. Finally, each entry
165 165 contains a linkrev entry that can serve as a pointer to external
166 166 data.
167 167
168 168 The revision data itself is a linear collection of data chunks.
169 169 Each chunk represents a revision and is usually represented as a
170 170 delta against the previous chunk. To bound lookup time, runs of
171 171 deltas are limited to about 2 times the length of the original
172 172 version data. This makes retrieval of a version proportional to
173 173 its size, or O(1) relative to the number of revisions.
174 174
175 175 Both pieces of the revlog are written to in an append-only
176 176 fashion, which means we never need to rewrite a file to insert or
177 177 remove data, and can use some simple techniques to avoid the need
178 178 for locking while reading.
179 179 """
180 180 def __init__(self, opener, indexfile, datafile):
181 181 """
182 182 create a revlog object
183 183
184 184 opener is a function that abstracts the file opening operation
185 185 and can be used to implement COW semantics or the like.
186 186 """
187 187 self.indexfile = indexfile
188 188 self.datafile = datafile
189 189 self.opener = opener
190 190 self.cache = None
191 191 self.chunkcache = None
192 192
193 193 try:
194 194 i = self.opener(self.indexfile).read()
195 195 except IOError, inst:
196 196 if inst.errno != errno.ENOENT:
197 197 raise
198 198 i = ""
199 199
200 200 if i and i[:4] != "\0\0\0\0":
201 201 raise RevlogError(_("incompatible revlog signature on %s") %
202 202 self.indexfile)
203 203
204 204 if len(i) > 10000:
205 205 # big index, let's parse it on demand
206 206 parser = lazyparser(i, self)
207 207 self.index = lazyindex(parser)
208 208 self.nodemap = lazymap(parser)
209 209 else:
210 210 s = struct.calcsize(indexformat)
211 211 l = len(i) / s
212 212 self.index = [None] * l
213 213 m = [None] * l
214 214
215 215 n = 0
216 216 for f in xrange(0, l * s, s):
217 217 # offset, size, base, linkrev, p1, p2, nodeid
218 218 e = struct.unpack(indexformat, i[f:f + s])
219 219 m[n] = (e[6], n)
220 220 self.index[n] = e
221 221 n += 1
222 222
223 223 self.nodemap = dict(m)
224 224 self.nodemap[nullid] = -1
225 225
226 226 def tip(self): return self.node(len(self.index) - 1)
227 227 def count(self): return len(self.index)
228 228 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
229 229 def rev(self, node):
230 230 try:
231 231 return self.nodemap[node]
232 232 except KeyError:
233 233 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
234 234 def linkrev(self, node): return self.index[self.rev(node)][3]
235 235 def parents(self, node):
236 236 if node == nullid: return (nullid, nullid)
237 237 return self.index[self.rev(node)][4:6]
238 238
239 239 def start(self, rev): return self.index[rev][0]
240 240 def length(self, rev): return self.index[rev][1]
241 241 def end(self, rev): return self.start(rev) + self.length(rev)
242 242 def base(self, rev): return self.index[rev][2]
243 243
244 244 def reachable(self, rev, stop=None):
245 245 reachable = {}
246 246 visit = [rev]
247 247 reachable[rev] = 1
248 248 if stop:
249 249 stopn = self.rev(stop)
250 250 else:
251 251 stopn = 0
252 252 while visit:
253 253 n = visit.pop(0)
254 254 if n == stop:
255 255 continue
256 256 if n == nullid:
257 257 continue
258 258 for p in self.parents(n):
259 259 if self.rev(p) < stopn:
260 260 continue
261 261 if p not in reachable:
262 262 reachable[p] = 1
263 263 visit.append(p)
264 264 return reachable
265 265
266 266 def nodesbetween(self, roots=None, heads=None):
267 267 """Return a tuple containing three elements. Elements 1 and 2 contain
268 268 a final list bases and heads after all the unreachable ones have been
269 269 pruned. Element 0 contains a topologically sorted list of all
270 270
271 271 nodes that satisfy these constraints:
272 272 1. All nodes must be descended from a node in roots (the nodes on
273 273 roots are considered descended from themselves).
274 274 2. All nodes must also be ancestors of a node in heads (the nodes in
275 275 heads are considered to be their own ancestors).
276 276
277 277 If roots is unspecified, nullid is assumed as the only root.
278 278 If heads is unspecified, it is taken to be the output of the
279 279 heads method (i.e. a list of all nodes in the repository that
280 280 have no children)."""
281 281 nonodes = ([], [], [])
282 282 if roots is not None:
283 283 roots = list(roots)
284 284 if not roots:
285 285 return nonodes
286 286 lowestrev = min([self.rev(n) for n in roots])
287 287 else:
288 288 roots = [nullid] # Everybody's a descendent of nullid
289 289 lowestrev = -1
290 290 if (lowestrev == -1) and (heads is None):
291 291 # We want _all_ the nodes!
292 292 return ([self.node(r) for r in xrange(0, self.count())],
293 293 [nullid], list(self.heads()))
294 294 if heads is None:
295 295 # All nodes are ancestors, so the latest ancestor is the last
296 296 # node.
297 297 highestrev = self.count() - 1
298 298 # Set ancestors to None to signal that every node is an ancestor.
299 299 ancestors = None
300 300 # Set heads to an empty dictionary for later discovery of heads
301 301 heads = {}
302 302 else:
303 303 heads = list(heads)
304 304 if not heads:
305 305 return nonodes
306 306 ancestors = {}
307 307 # Start at the top and keep marking parents until we're done.
308 308 nodestotag = heads[:]
309 309 # Turn heads into a dictionary so we can remove 'fake' heads.
310 310 # Also, later we will be using it to filter out the heads we can't
311 311 # find from roots.
312 312 heads = dict.fromkeys(heads, 0)
313 313 # Remember where the top was so we can use it as a limit later.
314 314 highestrev = max([self.rev(n) for n in nodestotag])
315 315 while nodestotag:
316 316 # grab a node to tag
317 317 n = nodestotag.pop()
318 318 # Never tag nullid
319 319 if n == nullid:
320 320 continue
321 321 # A node's revision number represents its place in a
322 322 # topologically sorted list of nodes.
323 323 r = self.rev(n)
324 324 if r >= lowestrev:
325 325 if n not in ancestors:
326 326 # If we are possibly a descendent of one of the roots
327 327 # and we haven't already been marked as an ancestor
328 328 ancestors[n] = 1 # Mark as ancestor
329 329 # Add non-nullid parents to list of nodes to tag.
330 330 nodestotag.extend([p for p in self.parents(n) if
331 331 p != nullid])
332 332 elif n in heads: # We've seen it before, is it a fake head?
333 333 # So it is, real heads should not be the ancestors of
334 334 # any other heads.
335 335 heads.pop(n)
336 336 if not ancestors:
337 337 return nonodes
338 338 # Now that we have our set of ancestors, we want to remove any
339 339 # roots that are not ancestors.
340 340
341 341 # If one of the roots was nullid, everything is included anyway.
342 342 if lowestrev > -1:
343 343 # But, since we weren't, let's recompute the lowest rev to not
344 344 # include roots that aren't ancestors.
345 345
346 346 # Filter out roots that aren't ancestors of heads
347 347 roots = [n for n in roots if n in ancestors]
348 348 # Recompute the lowest revision
349 349 if roots:
350 350 lowestrev = min([self.rev(n) for n in roots])
351 351 else:
352 352 # No more roots? Return empty list
353 353 return nonodes
354 354 else:
355 355 # We are descending from nullid, and don't need to care about
356 356 # any other roots.
357 357 lowestrev = -1
358 358 roots = [nullid]
359 359 # Transform our roots list into a 'set' (i.e. a dictionary where the
360 360 # values don't matter.
361 361 descendents = dict.fromkeys(roots, 1)
362 362 # Also, keep the original roots so we can filter out roots that aren't
363 363 # 'real' roots (i.e. are descended from other roots).
364 364 roots = descendents.copy()
365 365 # Our topologically sorted list of output nodes.
366 366 orderedout = []
367 367 # Don't start at nullid since we don't want nullid in our output list,
368 368 # and if nullid shows up in descedents, empty parents will look like
369 369 # they're descendents.
370 370 for r in xrange(max(lowestrev, 0), highestrev + 1):
371 371 n = self.node(r)
372 372 isdescendent = False
373 373 if lowestrev == -1: # Everybody is a descendent of nullid
374 374 isdescendent = True
375 375 elif n in descendents:
376 376 # n is already a descendent
377 377 isdescendent = True
378 378 # This check only needs to be done here because all the roots
379 379 # will start being marked is descendents before the loop.
380 380 if n in roots:
381 381 # If n was a root, check if it's a 'real' root.
382 382 p = tuple(self.parents(n))
383 383 # If any of its parents are descendents, it's not a root.
384 384 if (p[0] in descendents) or (p[1] in descendents):
385 385 roots.pop(n)
386 386 else:
387 387 p = tuple(self.parents(n))
388 388 # A node is a descendent if either of its parents are
389 389 # descendents. (We seeded the dependents list with the roots
390 390 # up there, remember?)
391 391 if (p[0] in descendents) or (p[1] in descendents):
392 392 descendents[n] = 1
393 393 isdescendent = True
394 394 if isdescendent and ((ancestors is None) or (n in ancestors)):
395 395 # Only include nodes that are both descendents and ancestors.
396 396 orderedout.append(n)
397 397 if (ancestors is not None) and (n in heads):
398 398 # We're trying to figure out which heads are reachable
399 399 # from roots.
400 400 # Mark this head as having been reached
401 401 heads[n] = 1
402 402 elif ancestors is None:
403 403 # Otherwise, we're trying to discover the heads.
404 404 # Assume this is a head because if it isn't, the next step
405 405 # will eventually remove it.
406 406 heads[n] = 1
407 407 # But, obviously its parents aren't.
408 408 for p in self.parents(n):
409 409 heads.pop(p, None)
410 410 heads = [n for n in heads.iterkeys() if heads[n] != 0]
411 411 roots = roots.keys()
412 412 assert orderedout
413 413 assert roots
414 414 assert heads
415 415 return (orderedout, roots, heads)
416 416
417 417 def heads(self, start=None):
418 418 """return the list of all nodes that have no children
419 419
420 420 if start is specified, only heads that are descendants of
421 421 start will be returned
422 422
423 423 """
424 424 if start is None:
425 425 start = nullid
426 426 reachable = {start: 1}
427 427 heads = {start: 1}
428 428 startrev = self.rev(start)
429 429
430 430 for r in xrange(startrev + 1, self.count()):
431 431 n = self.node(r)
432 432 for pn in self.parents(n):
433 433 if pn in reachable:
434 434 reachable[n] = 1
435 435 heads[n] = 1
436 436 if pn in heads:
437 437 del heads[pn]
438 438 return heads.keys()
439 439
440 440 def children(self, node):
441 441 """find the children of a given node"""
442 442 c = []
443 443 p = self.rev(node)
444 444 for r in range(p + 1, self.count()):
445 445 n = self.node(r)
446 446 for pn in self.parents(n):
447 447 if pn == node:
448 448 c.append(n)
449 449 continue
450 450 elif pn == nullid:
451 451 continue
452 452 return c
453 453
454 454 def lookup(self, id):
455 455 """locate a node based on revision number or subset of hex nodeid"""
456 456 try:
457 457 rev = int(id)
458 458 if str(rev) != id: raise ValueError
459 459 if rev < 0: rev = self.count() + rev
460 460 if rev < 0 or rev >= self.count(): raise ValueError
461 461 return self.node(rev)
462 462 except (ValueError, OverflowError):
463 463 c = []
464 464 for n in self.nodemap:
465 465 if hex(n).startswith(id):
466 466 c.append(n)
467 467 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
468 468 if len(c) < 1: raise RevlogError(_("No match found"))
469 469 return c[0]
470 470
471 471 return None
472 472
473 473 def diff(self, a, b):
474 474 """return a delta between two revisions"""
475 475 return mdiff.textdiff(a, b)
476 476
477 477 def patches(self, t, pl):
478 478 """apply a list of patches to a string"""
479 479 return mdiff.patches(t, pl)
480 480
481 481 def chunk(self, rev):
482 482 start, length = self.start(rev), self.length(rev)
483 483 end = start + length
484 484
485 485 def loadcache():
486 486 cache_length = max(4096 * 1024, length) # 4Mo
487 487 df = self.opener(self.datafile)
488 488 df.seek(start)
489 489 self.chunkcache = (start, df.read(cache_length))
490 490
491 491 if not self.chunkcache:
492 492 loadcache()
493 493
494 494 cache_start = self.chunkcache[0]
495 495 cache_end = cache_start + len(self.chunkcache[1])
496 496 if start >= cache_start and end <= cache_end:
497 497 # it is cached
498 498 offset = start - cache_start
499 499 else:
500 500 loadcache()
501 501 offset = 0
502 502
503 503 #def checkchunk():
504 504 # df = self.opener(self.datafile)
505 505 # df.seek(start)
506 506 # return df.read(length)
507 507 #assert s == checkchunk()
508 508 return decompress(self.chunkcache[1][offset:offset + length])
509 509
510 510 def delta(self, node):
511 511 """return or calculate a delta between a node and its predecessor"""
512 512 r = self.rev(node)
513 513 b = self.base(r)
514 514 if r == b:
515 515 return self.diff(self.revision(self.node(r - 1)),
516 516 self.revision(node))
517 517 else:
518 518 return self.chunk(r)
519 519
520 520 def revision(self, node):
521 521 """return an uncompressed revision of a given"""
522 522 if node == nullid: return ""
523 523 if self.cache and self.cache[0] == node: return self.cache[2]
524 524
525 525 # look up what we need to read
526 526 text = None
527 527 rev = self.rev(node)
528 528 base = self.base(rev)
529 529
530 530 # do we have useful data cached?
531 531 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
532 532 base = self.cache[1]
533 533 text = self.cache[2]
534 534 else:
535 535 text = self.chunk(base)
536 536
537 537 bins = []
538 538 for r in xrange(base + 1, rev + 1):
539 539 bins.append(self.chunk(r))
540 540
541 541 text = mdiff.patches(text, bins)
542 542
543 543 p1, p2 = self.parents(node)
544 544 if node != hash(text, p1, p2):
545 545 raise RevlogError(_("integrity check failed on %s:%d")
546 546 % (self.datafile, rev))
547 547
548 548 self.cache = (node, rev, text)
549 549 return text
550 550
551 551 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
552 552 """add a revision to the log
553 553
554 554 text - the revision data to add
555 555 transaction - the transaction object used for rollback
556 556 link - the linkrev data to add
557 557 p1, p2 - the parent nodeids of the revision
558 558 d - an optional precomputed delta
559 559 """
560 560 if text is None: text = ""
561 561 if p1 is None: p1 = self.tip()
562 562 if p2 is None: p2 = nullid
563 563
564 564 node = hash(text, p1, p2)
565 565
566 566 if node in self.nodemap:
567 567 return node
568 568
569 569 n = self.count()
570 570 t = n - 1
571 571
572 572 if n:
573 573 base = self.base(t)
574 574 start = self.start(base)
575 575 end = self.end(t)
576 576 if not d:
577 577 prev = self.revision(self.tip())
578 578 d = self.diff(prev, str(text))
579 579 data = compress(d)
580 580 l = len(data[1]) + len(data[0])
581 581 dist = end - start + l
582 582
583 583 # full versions are inserted when the needed deltas
584 584 # become comparable to the uncompressed text
585 585 if not n or dist > len(text) * 2:
586 586 data = compress(text)
587 587 l = len(data[1]) + len(data[0])
588 588 base = n
589 589 else:
590 590 base = self.base(t)
591 591
592 592 offset = 0
593 593 if t >= 0:
594 594 offset = self.end(t)
595 595
596 596 e = (offset, l, base, link, p1, p2, node)
597 597
598 598 self.index.append(e)
599 599 self.nodemap[node] = n
600 600 entry = struct.pack(indexformat, *e)
601 601
602 602 transaction.add(self.datafile, e[0])
603 603 f = self.opener(self.datafile, "a")
604 604 if data[0]:
605 605 f.write(data[0])
606 606 f.write(data[1])
607 607 transaction.add(self.indexfile, n * len(entry))
608 608 self.opener(self.indexfile, "a").write(entry)
609 609
610 610 self.cache = (node, n, text)
611 611 return node
612 612
613 613 def ancestor(self, a, b):
614 614 """calculate the least common ancestor of nodes a and b"""
615 615 # calculate the distance of every node from root
616 616 dist = {nullid: 0}
617 617 for i in xrange(self.count()):
618 618 n = self.node(i)
619 619 p1, p2 = self.parents(n)
620 620 dist[n] = max(dist[p1], dist[p2]) + 1
621 621
622 622 # traverse ancestors in order of decreasing distance from root
623 623 def ancestors(node):
624 624 # we store negative distances because heap returns smallest member
625 625 h = [(-dist[node], node)]
626 626 seen = {}
627 627 earliest = self.count()
628 628 while h:
629 629 d, n = heapq.heappop(h)
630 630 if n not in seen:
631 631 seen[n] = 1
632 632 r = self.rev(n)
633 633 yield (-d, n)
634 634 for p in self.parents(n):
635 635 heapq.heappush(h, (-dist[p], p))
636 636
637 637 def generations(node):
638 638 sg, s = None, {}
639 639 for g,n in ancestors(node):
640 640 if g != sg:
641 641 if sg:
642 642 yield sg, s
643 643 sg, s = g, {n:1}
644 644 else:
645 645 s[n] = 1
646 646 yield sg, s
647 647
648 648 x = generations(a)
649 649 y = generations(b)
650 650 gx = x.next()
651 651 gy = y.next()
652 652
653 653 # increment each ancestor list until it is closer to root than
654 654 # the other, or they match
655 655 while 1:
656 656 #print "ancestor gen %s %s" % (gx[0], gy[0])
657 657 if gx[0] == gy[0]:
658 658 # find the intersection
659 659 i = [ n for n in gx[1] if n in gy[1] ]
660 660 if i:
661 661 return i[0]
662 662 else:
663 663 #print "next"
664 664 gy = y.next()
665 665 gx = x.next()
666 666 elif gx[0] < gy[0]:
667 667 #print "next y"
668 668 gy = y.next()
669 669 else:
670 670 #print "next x"
671 671 gx = x.next()
672 672
673 673 def group(self, nodelist, lookup, infocollect=None):
674 674 """calculate a delta group
675 675
676 676 Given a list of changeset revs, return a set of deltas and
677 677 metadata corresponding to nodes. the first delta is
678 678 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
679 679 have this parent as it has all history before these
680 680 changesets. parent is parent[0]
681 681 """
682 682 revs = [self.rev(n) for n in nodelist]
683 683
684 684 # if we don't have any revisions touched by these changesets, bail
685 685 if not revs:
686 686 yield struct.pack(">l", 0)
687 687 return
688 688
689 689 # add the parent of the first rev
690 690 p = self.parents(self.node(revs[0]))[0]
691 691 revs.insert(0, self.rev(p))
692 692
693 693 # helper to reconstruct intermediate versions
694 694 def construct(text, base, rev):
695 695 bins = [self.chunk(r) for r in xrange(base + 1, rev + 1)]
696 696 return mdiff.patches(text, bins)
697 697
698 698 # build deltas
699 699 for d in xrange(0, len(revs) - 1):
700 700 a, b = revs[d], revs[d + 1]
701 701 na = self.node(a)
702 702 nb = self.node(b)
703 703
704 704 if infocollect is not None:
705 705 infocollect(nb)
706 706
707 707 # do we need to construct a new delta?
708 708 if a + 1 != b or self.base(b) == b:
709 709 ta = self.revision(na)
710 710 tb = self.revision(nb)
711 711 d = self.diff(ta, tb)
712 712 else:
713 713 d = self.chunk(b)
714 714
715 715 p = self.parents(nb)
716 716 meta = nb + p[0] + p[1] + lookup(nb)
717 717 l = struct.pack(">l", len(meta) + len(d) + 4)
718 718 yield l
719 719 yield meta
720 720 yield d
721 721
722 722 yield struct.pack(">l", 0)
723 723
724 724 def addgroup(self, revs, linkmapper, transaction, unique=0):
725 725 """
726 726 add a delta group
727 727
728 728 given a set of deltas, add them to the revision log. the
729 729 first delta is against its parent, which should be in our
730 730 log, the rest are against the previous delta.
731 731 """
732 732
733 733 #track the base of the current delta log
734 734 r = self.count()
735 735 t = r - 1
736 736 node = nullid
737 737
738 738 base = prev = -1
739 739 start = end = measure = 0
740 740 if r:
741 741 start = self.start(self.base(t))
742 742 end = self.end(t)
743 743 measure = self.length(self.base(t))
744 744 base = self.base(t)
745 745 prev = self.tip()
746 746
747 747 transaction.add(self.datafile, end)
748 748 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
749 749 dfh = self.opener(self.datafile, "a")
750 750 ifh = self.opener(self.indexfile, "a")
751 751
752 752 # loop through our set of deltas
753 753 chain = None
754 754 for chunk in revs:
755 755 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
756 756 link = linkmapper(cs)
757 757 if node in self.nodemap:
758 758 # this can happen if two branches make the same change
759 759 # if unique:
760 760 # raise RevlogError(_("already have %s") % hex(node[:4]))
761 761 chain = node
762 762 continue
763 763 delta = chunk[80:]
764 764
765 765 for p in (p1, p2):
766 766 if not p in self.nodemap:
767 767 raise RevlogError(_("unknown parent %s") % short(p1))
768 768
769 769 if not chain:
770 770 # retrieve the parent revision of the delta chain
771 771 chain = p1
772 772 if not chain in self.nodemap:
773 773 raise RevlogError(_("unknown base %s") % short(chain[:4]))
774 774
775 775 # full versions are inserted when the needed deltas become
776 776 # comparable to the uncompressed text or when the previous
777 777 # version is not the one we have a delta against. We use
778 778 # the size of the previous full rev as a proxy for the
779 779 # current size.
780 780
781 781 if chain == prev:
782 782 tempd = compress(delta)
783 783 cdelta = tempd[0] + tempd[1]
784 784
785 785 if chain != prev or (end - start + len(cdelta)) > measure * 2:
786 786 # flush our writes here so we can read it in revision
787 787 dfh.flush()
788 788 ifh.flush()
789 789 text = self.revision(chain)
790 790 text = self.patches(text, [delta])
791 791 chk = self.addrevision(text, transaction, link, p1, p2)
792 792 if chk != node:
793 793 raise RevlogError(_("consistency error adding group"))
794 794 measure = len(text)
795 795 else:
796 796 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
797 797 self.index.append(e)
798 798 self.nodemap[node] = r
799 799 dfh.write(cdelta)
800 800 ifh.write(struct.pack(indexformat, *e))
801 801
802 802 t, r, chain, prev = r, r + 1, node, node
803 803 start = self.start(self.base(t))
804 804 end = self.end(t)
805 805
806 806 dfh.close()
807 807 ifh.close()
808 808 return node
809 809
810 810 def strip(self, rev, minlink):
811 811 if self.count() == 0 or rev >= self.count():
812 812 return
813 813
814 814 # When stripping away a revision, we need to make sure it
815 815 # does not actually belong to an older changeset.
816 816 # The minlink parameter defines the oldest revision
817 817 # we're allowed to strip away.
818 818 while minlink > self.index[rev][3]:
819 819 rev += 1
820 820 if rev >= self.count():
821 821 return
822 822
823 823 # first truncate the files on disk
824 824 end = self.start(rev)
825 825 self.opener(self.datafile, "a").truncate(end)
826 826 end = rev * struct.calcsize(indexformat)
827 827 self.opener(self.indexfile, "a").truncate(end)
828 828
829 829 # then reset internal state in memory to forget those revisions
830 830 self.cache = None
831 self.chunkcache = None
831 832 for p in self.index[rev:]:
832 833 del self.nodemap[p[6]]
833 834 del self.index[rev:]
834 835
835 836 # truncating the lazyindex also truncates the lazymap.
836 837 if isinstance(self.index, lazyindex):
837 838 self.index.trunc(end)
838 839
839 840
840 841 def checksize(self):
841 842 expected = 0
842 843 if self.count():
843 844 expected = self.end(self.count() - 1)
844 845
845 846 try:
846 847 f = self.opener(self.datafile)
847 848 f.seek(0, 2)
848 849 actual = f.tell()
849 850 dd = actual - expected
850 851 except IOError, inst:
851 852 if inst.errno != errno.ENOENT:
852 853 raise
853 854 dd = 0
854 855
855 856 try:
856 857 f = self.opener(self.indexfile)
857 858 f.seek(0, 2)
858 859 actual = f.tell()
859 860 s = struct.calcsize(indexformat)
860 861 i = actual / s
861 862 di = actual - (i * s)
862 863 except IOError, inst:
863 864 if inst.errno != errno.ENOENT:
864 865 raise
865 866 di = 0
866 867
867 868 return (dd, di)
868 869
869 870
General Comments 0
You need to be logged in to leave comments. Login now