##// END OF EJS Templates
Add revlog.strip to truncate away revisions....
mason@suse.com -
r1535:7ae0ce7a default
parent child Browse files
Show More
@@ -1,851 +1,890 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
17 17
18 18 def hash(text, p1, p2):
19 19 """generate a hash from the given text and its parent hashes
20 20
21 21 This hash combines both the current file contents and its history
22 22 in a manner that makes it easy to distinguish nodes with the same
23 23 content in the revision graph.
24 24 """
25 25 l = [p1, p2]
26 26 l.sort()
27 27 s = sha.new(l[0])
28 28 s.update(l[1])
29 29 s.update(text)
30 30 return s.digest()
31 31
32 32 def compress(text):
33 33 """ generate a possibly-compressed representation of text """
34 34 if not text: return ("", text)
35 35 if len(text) < 44:
36 36 if text[0] == '\0': return ("", text)
37 37 return ('u', text)
38 38 bin = zlib.compress(text)
39 39 if len(bin) > len(text):
40 40 if text[0] == '\0': return ("", text)
41 41 return ('u', text)
42 42 return ("", bin)
43 43
44 44 def decompress(bin):
45 45 """ decompress the given input """
46 46 if not bin: return bin
47 47 t = bin[0]
48 48 if t == '\0': return bin
49 49 if t == 'x': return zlib.decompress(bin)
50 50 if t == 'u': return bin[1:]
51 51 raise RevlogError(_("unknown compression type %s") % t)
52 52
53 53 indexformat = ">4l20s20s20s"
54 54
55 55 class lazyparser:
56 56 """
57 57 this class avoids the need to parse the entirety of large indices
58 58
59 59 By default we parse and load 1000 entries at a time.
60 60
61 61 If no position is specified, we load the whole index, and replace
62 62 the lazy objects in revlog with the underlying objects for
63 63 efficiency in cases where we look at most of the nodes.
64 64 """
65 65 def __init__(self, data, revlog):
66 66 self.data = data
67 67 self.s = struct.calcsize(indexformat)
68 68 self.l = len(data)/self.s
69 69 self.index = [None] * self.l
70 70 self.map = {nullid: -1}
71 71 self.all = 0
72 72 self.revlog = revlog
73 73
74 def trunc(self, pos):
75 self.l = pos/self.s
76
74 77 def load(self, pos=None):
75 78 if self.all: return
76 79 if pos is not None:
77 80 block = pos / 1000
78 81 i = block * 1000
79 82 end = min(self.l, i + 1000)
80 83 else:
81 84 self.all = 1
82 85 i = 0
83 86 end = self.l
84 87 self.revlog.index = self.index
85 88 self.revlog.nodemap = self.map
86 89
87 90 while i < end:
88 91 d = self.data[i * self.s: (i + 1) * self.s]
89 92 e = struct.unpack(indexformat, d)
90 93 self.index[i] = e
91 94 self.map[e[6]] = i
92 95 i += 1
93 96
94 97 class lazyindex:
95 98 """a lazy version of the index array"""
96 99 def __init__(self, parser):
97 100 self.p = parser
98 101 def __len__(self):
99 102 return len(self.p.index)
100 103 def load(self, pos):
101 104 if pos < 0:
102 105 pos += len(self.p.index)
103 106 self.p.load(pos)
104 107 return self.p.index[pos]
105 108 def __getitem__(self, pos):
106 109 return self.p.index[pos] or self.load(pos)
110 def __delitem__(self, pos):
111 del self.p.index[pos]
107 112 def append(self, e):
108 113 self.p.index.append(e)
114 def trunc(self, pos):
115 self.p.trunc(pos)
109 116
110 117 class lazymap:
111 118 """a lazy version of the node map"""
112 119 def __init__(self, parser):
113 120 self.p = parser
114 121 def load(self, key):
115 122 if self.p.all: return
116 123 n = self.p.data.find(key)
117 124 if n < 0:
118 125 raise KeyError(key)
119 126 pos = n / self.p.s
120 127 self.p.load(pos)
121 128 def __contains__(self, key):
122 129 self.p.load()
123 130 return key in self.p.map
124 131 def __iter__(self):
125 132 yield nullid
126 133 for i in xrange(self.p.l):
127 134 try:
128 135 yield self.p.index[i][6]
129 136 except:
130 137 self.p.load(i)
131 138 yield self.p.index[i][6]
132 139 def __getitem__(self, key):
133 140 try:
134 141 return self.p.map[key]
135 142 except KeyError:
136 143 try:
137 144 self.load(key)
138 145 return self.p.map[key]
139 146 except KeyError:
140 147 raise KeyError("node " + hex(key))
141 148 def __setitem__(self, key, val):
142 149 self.p.map[key] = val
150 def __delitem__(self, key):
151 del self.p.map[key]
143 152
144 153 class RevlogError(Exception): pass
145 154
146 155 class revlog:
147 156 """
148 157 the underlying revision storage object
149 158
150 159 A revlog consists of two parts, an index and the revision data.
151 160
152 161 The index is a file with a fixed record size containing
153 162 information on each revision, includings its nodeid (hash), the
154 163 nodeids of its parents, the position and offset of its data within
155 164 the data file, and the revision it's based on. Finally, each entry
156 165 contains a linkrev entry that can serve as a pointer to external
157 166 data.
158 167
159 168 The revision data itself is a linear collection of data chunks.
160 169 Each chunk represents a revision and is usually represented as a
161 170 delta against the previous chunk. To bound lookup time, runs of
162 171 deltas are limited to about 2 times the length of the original
163 172 version data. This makes retrieval of a version proportional to
164 173 its size, or O(1) relative to the number of revisions.
165 174
166 175 Both pieces of the revlog are written to in an append-only
167 176 fashion, which means we never need to rewrite a file to insert or
168 177 remove data, and can use some simple techniques to avoid the need
169 178 for locking while reading.
170 179 """
171 180 def __init__(self, opener, indexfile, datafile):
172 181 """
173 182 create a revlog object
174 183
175 184 opener is a function that abstracts the file opening operation
176 185 and can be used to implement COW semantics or the like.
177 186 """
178 187 self.indexfile = indexfile
179 188 self.datafile = datafile
180 189 self.opener = opener
181 190 self.cache = None
182 191
183 192 try:
184 193 i = self.opener(self.indexfile).read()
185 194 except IOError, inst:
186 195 if inst.errno != errno.ENOENT:
187 196 raise
188 197 i = ""
189 198
190 199 if len(i) > 10000:
191 200 # big index, let's parse it on demand
192 201 parser = lazyparser(i, self)
193 202 self.index = lazyindex(parser)
194 203 self.nodemap = lazymap(parser)
195 204 else:
196 205 s = struct.calcsize(indexformat)
197 206 l = len(i) / s
198 207 self.index = [None] * l
199 208 m = [None] * l
200 209
201 210 n = 0
202 211 for f in xrange(0, len(i), s):
203 212 # offset, size, base, linkrev, p1, p2, nodeid
204 213 e = struct.unpack(indexformat, i[f:f + s])
205 214 m[n] = (e[6], n)
206 215 self.index[n] = e
207 216 n += 1
208 217
209 218 self.nodemap = dict(m)
210 219 self.nodemap[nullid] = -1
211 220
212 221 def tip(self): return self.node(len(self.index) - 1)
213 222 def count(self): return len(self.index)
214 223 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
215 224 def rev(self, node):
216 225 try:
217 226 return self.nodemap[node]
218 227 except KeyError:
219 228 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
220 229 def linkrev(self, node): return self.index[self.rev(node)][3]
221 230 def parents(self, node):
222 231 if node == nullid: return (nullid, nullid)
223 232 return self.index[self.rev(node)][4:6]
224 233
225 234 def start(self, rev): return self.index[rev][0]
226 235 def length(self, rev): return self.index[rev][1]
227 236 def end(self, rev): return self.start(rev) + self.length(rev)
228 237 def base(self, rev): return self.index[rev][2]
229 238
230 239 def reachable(self, rev, stop=None):
231 240 reachable = {}
232 241 visit = [rev]
233 242 reachable[rev] = 1
234 243 if stop:
235 244 stopn = self.rev(stop)
236 245 else:
237 246 stopn = 0
238 247 while visit:
239 248 n = visit.pop(0)
240 249 if n == stop:
241 250 continue
242 251 if n == nullid:
243 252 continue
244 253 for p in self.parents(n):
245 254 if self.rev(p) < stopn:
246 255 continue
247 256 if p not in reachable:
248 257 reachable[p] = 1
249 258 visit.append(p)
250 259 return reachable
251 260
252 261 def nodesbetween(self, roots=None, heads=None):
253 262 """Return a tuple containing three elements. Elements 1 and 2 contain
254 263 a final list bases and heads after all the unreachable ones have been
255 264 pruned. Element 0 contains a topologically sorted list of all
256 265
257 266 nodes that satisfy these constraints:
258 267 1. All nodes must be descended from a node in roots (the nodes on
259 268 roots are considered descended from themselves).
260 269 2. All nodes must also be ancestors of a node in heads (the nodes in
261 270 heads are considered to be their own ancestors).
262 271
263 272 If roots is unspecified, nullid is assumed as the only root.
264 273 If heads is unspecified, it is taken to be the output of the
265 274 heads method (i.e. a list of all nodes in the repository that
266 275 have no children)."""
267 276 nonodes = ([], [], [])
268 277 if roots is not None:
269 278 roots = list(roots)
270 279 if not roots:
271 280 return nonodes
272 281 lowestrev = min([self.rev(n) for n in roots])
273 282 else:
274 283 roots = [nullid] # Everybody's a descendent of nullid
275 284 lowestrev = -1
276 285 if (lowestrev == -1) and (heads is None):
277 286 # We want _all_ the nodes!
278 287 return ([self.node(r) for r in xrange(0, self.count())],
279 288 [nullid], list(self.heads()))
280 289 if heads is None:
281 290 # All nodes are ancestors, so the latest ancestor is the last
282 291 # node.
283 292 highestrev = self.count() - 1
284 293 # Set ancestors to None to signal that every node is an ancestor.
285 294 ancestors = None
286 295 # Set heads to an empty dictionary for later discovery of heads
287 296 heads = {}
288 297 else:
289 298 heads = list(heads)
290 299 if not heads:
291 300 return nonodes
292 301 ancestors = {}
293 302 # Start at the top and keep marking parents until we're done.
294 303 nodestotag = heads[:]
295 304 # Turn heads into a dictionary so we can remove 'fake' heads.
296 305 # Also, later we will be using it to filter out the heads we can't
297 306 # find from roots.
298 307 heads = dict.fromkeys(heads, 0)
299 308 # Remember where the top was so we can use it as a limit later.
300 309 highestrev = max([self.rev(n) for n in nodestotag])
301 310 while nodestotag:
302 311 # grab a node to tag
303 312 n = nodestotag.pop()
304 313 # Never tag nullid
305 314 if n == nullid:
306 315 continue
307 316 # A node's revision number represents its place in a
308 317 # topologically sorted list of nodes.
309 318 r = self.rev(n)
310 319 if r >= lowestrev:
311 320 if n not in ancestors:
312 321 # If we are possibly a descendent of one of the roots
313 322 # and we haven't already been marked as an ancestor
314 323 ancestors[n] = 1 # Mark as ancestor
315 324 # Add non-nullid parents to list of nodes to tag.
316 325 nodestotag.extend([p for p in self.parents(n) if
317 326 p != nullid])
318 327 elif n in heads: # We've seen it before, is it a fake head?
319 328 # So it is, real heads should not be the ancestors of
320 329 # any other heads.
321 330 heads.pop(n)
322 331 if not ancestors:
323 332 return nonodes
324 333 # Now that we have our set of ancestors, we want to remove any
325 334 # roots that are not ancestors.
326 335
327 336 # If one of the roots was nullid, everything is included anyway.
328 337 if lowestrev > -1:
329 338 # But, since we weren't, let's recompute the lowest rev to not
330 339 # include roots that aren't ancestors.
331 340
332 341 # Filter out roots that aren't ancestors of heads
333 342 roots = [n for n in roots if n in ancestors]
334 343 # Recompute the lowest revision
335 344 if roots:
336 345 lowestrev = min([self.rev(n) for n in roots])
337 346 else:
338 347 # No more roots? Return empty list
339 348 return nonodes
340 349 else:
341 350 # We are descending from nullid, and don't need to care about
342 351 # any other roots.
343 352 lowestrev = -1
344 353 roots = [nullid]
345 354 # Transform our roots list into a 'set' (i.e. a dictionary where the
346 355 # values don't matter.
347 356 descendents = dict.fromkeys(roots, 1)
348 357 # Also, keep the original roots so we can filter out roots that aren't
349 358 # 'real' roots (i.e. are descended from other roots).
350 359 roots = descendents.copy()
351 360 # Our topologically sorted list of output nodes.
352 361 orderedout = []
353 362 # Don't start at nullid since we don't want nullid in our output list,
354 363 # and if nullid shows up in descedents, empty parents will look like
355 364 # they're descendents.
356 365 for r in xrange(max(lowestrev, 0), highestrev + 1):
357 366 n = self.node(r)
358 367 isdescendent = False
359 368 if lowestrev == -1: # Everybody is a descendent of nullid
360 369 isdescendent = True
361 370 elif n in descendents:
362 371 # n is already a descendent
363 372 isdescendent = True
364 373 # This check only needs to be done here because all the roots
365 374 # will start being marked is descendents before the loop.
366 375 if n in roots:
367 376 # If n was a root, check if it's a 'real' root.
368 377 p = tuple(self.parents(n))
369 378 # If any of its parents are descendents, it's not a root.
370 379 if (p[0] in descendents) or (p[1] in descendents):
371 380 roots.pop(n)
372 381 else:
373 382 p = tuple(self.parents(n))
374 383 # A node is a descendent if either of its parents are
375 384 # descendents. (We seeded the dependents list with the roots
376 385 # up there, remember?)
377 386 if (p[0] in descendents) or (p[1] in descendents):
378 387 descendents[n] = 1
379 388 isdescendent = True
380 389 if isdescendent and ((ancestors is None) or (n in ancestors)):
381 390 # Only include nodes that are both descendents and ancestors.
382 391 orderedout.append(n)
383 392 if (ancestors is not None) and (n in heads):
384 393 # We're trying to figure out which heads are reachable
385 394 # from roots.
386 395 # Mark this head as having been reached
387 396 heads[n] = 1
388 397 elif ancestors is None:
389 398 # Otherwise, we're trying to discover the heads.
390 399 # Assume this is a head because if it isn't, the next step
391 400 # will eventually remove it.
392 401 heads[n] = 1
393 402 # But, obviously its parents aren't.
394 403 for p in self.parents(n):
395 404 heads.pop(p, None)
396 405 heads = [n for n in heads.iterkeys() if heads[n] != 0]
397 406 roots = roots.keys()
398 407 assert orderedout
399 408 assert roots
400 409 assert heads
401 410 return (orderedout, roots, heads)
402 411
403 412 def heads(self, stop=None):
404 413 """return the list of all nodes that have no children"""
405 414 p = {}
406 415 h = []
407 416 stoprev = 0
408 417 if stop and stop in self.nodemap:
409 418 stoprev = self.rev(stop)
410 419
411 420 for r in range(self.count() - 1, -1, -1):
412 421 n = self.node(r)
413 422 if n not in p:
414 423 h.append(n)
415 424 if n == stop:
416 425 break
417 426 if r < stoprev:
418 427 break
419 428 for pn in self.parents(n):
420 429 p[pn] = 1
421 430 return h
422 431
423 432 def children(self, node):
424 433 """find the children of a given node"""
425 434 c = []
426 435 p = self.rev(node)
427 436 for r in range(p + 1, self.count()):
428 437 n = self.node(r)
429 438 for pn in self.parents(n):
430 439 if pn == node:
431 440 c.append(n)
432 441 continue
433 442 elif pn == nullid:
434 443 continue
435 444 return c
436 445
437 446 def lookup(self, id):
438 447 """locate a node based on revision number or subset of hex nodeid"""
439 448 try:
440 449 rev = int(id)
441 450 if str(rev) != id: raise ValueError
442 451 if rev < 0: rev = self.count() + rev
443 452 if rev < 0 or rev >= self.count(): raise ValueError
444 453 return self.node(rev)
445 454 except (ValueError, OverflowError):
446 455 c = []
447 456 for n in self.nodemap:
448 457 if hex(n).startswith(id):
449 458 c.append(n)
450 459 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
451 460 if len(c) < 1: raise RevlogError(_("No match found"))
452 461 return c[0]
453 462
454 463 return None
455 464
456 465 def diff(self, a, b):
457 466 """return a delta between two revisions"""
458 467 return mdiff.textdiff(a, b)
459 468
460 469 def patches(self, t, pl):
461 470 """apply a list of patches to a string"""
462 471 return mdiff.patches(t, pl)
463 472
464 473 def delta(self, node):
465 474 """return or calculate a delta between a node and its predecessor"""
466 475 r = self.rev(node)
467 476 b = self.base(r)
468 477 if r == b:
469 478 return self.diff(self.revision(self.node(r - 1)),
470 479 self.revision(node))
471 480 else:
472 481 f = self.opener(self.datafile)
473 482 f.seek(self.start(r))
474 483 data = f.read(self.length(r))
475 484 return decompress(data)
476 485
477 486 def revision(self, node):
478 487 """return an uncompressed revision of a given"""
479 488 if node == nullid: return ""
480 489 if self.cache and self.cache[0] == node: return self.cache[2]
481 490
482 491 # look up what we need to read
483 492 text = None
484 493 rev = self.rev(node)
485 494 start, length, base, link, p1, p2, node = self.index[rev]
486 495 end = start + length
487 496 if base != rev: start = self.start(base)
488 497
489 498 # do we have useful data cached?
490 499 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
491 500 base = self.cache[1]
492 501 start = self.start(base + 1)
493 502 text = self.cache[2]
494 503 last = 0
495 504
496 505 f = self.opener(self.datafile)
497 506 f.seek(start)
498 507 data = f.read(end - start)
499 508
500 509 if text is None:
501 510 last = self.length(base)
502 511 text = decompress(data[:last])
503 512
504 513 bins = []
505 514 for r in xrange(base + 1, rev + 1):
506 515 s = self.length(r)
507 516 bins.append(decompress(data[last:last + s]))
508 517 last = last + s
509 518
510 519 text = mdiff.patches(text, bins)
511 520
512 521 if node != hash(text, p1, p2):
513 522 raise RevlogError(_("integrity check failed on %s:%d")
514 523 % (self.datafile, rev))
515 524
516 525 self.cache = (node, rev, text)
517 526 return text
518 527
519 528 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
520 529 """add a revision to the log
521 530
522 531 text - the revision data to add
523 532 transaction - the transaction object used for rollback
524 533 link - the linkrev data to add
525 534 p1, p2 - the parent nodeids of the revision
526 535 d - an optional precomputed delta
527 536 """
528 537 if text is None: text = ""
529 538 if p1 is None: p1 = self.tip()
530 539 if p2 is None: p2 = nullid
531 540
532 541 node = hash(text, p1, p2)
533 542
534 543 if node in self.nodemap:
535 544 return node
536 545
537 546 n = self.count()
538 547 t = n - 1
539 548
540 549 if n:
541 550 base = self.base(t)
542 551 start = self.start(base)
543 552 end = self.end(t)
544 553 if not d:
545 554 prev = self.revision(self.tip())
546 555 d = self.diff(prev, str(text))
547 556 data = compress(d)
548 557 l = len(data[1]) + len(data[0])
549 558 dist = end - start + l
550 559
551 560 # full versions are inserted when the needed deltas
552 561 # become comparable to the uncompressed text
553 562 if not n or dist > len(text) * 2:
554 563 data = compress(text)
555 564 l = len(data[1]) + len(data[0])
556 565 base = n
557 566 else:
558 567 base = self.base(t)
559 568
560 569 offset = 0
561 570 if t >= 0:
562 571 offset = self.end(t)
563 572
564 573 e = (offset, l, base, link, p1, p2, node)
565 574
566 575 self.index.append(e)
567 576 self.nodemap[node] = n
568 577 entry = struct.pack(indexformat, *e)
569 578
570 579 transaction.add(self.datafile, e[0])
571 580 f = self.opener(self.datafile, "a")
572 581 if data[0]:
573 582 f.write(data[0])
574 583 f.write(data[1])
575 584 transaction.add(self.indexfile, n * len(entry))
576 585 self.opener(self.indexfile, "a").write(entry)
577 586
578 587 self.cache = (node, n, text)
579 588 return node
580 589
581 590 def ancestor(self, a, b):
582 591 """calculate the least common ancestor of nodes a and b"""
583 592 # calculate the distance of every node from root
584 593 dist = {nullid: 0}
585 594 for i in xrange(self.count()):
586 595 n = self.node(i)
587 596 p1, p2 = self.parents(n)
588 597 dist[n] = max(dist[p1], dist[p2]) + 1
589 598
590 599 # traverse ancestors in order of decreasing distance from root
591 600 def ancestors(node):
592 601 # we store negative distances because heap returns smallest member
593 602 h = [(-dist[node], node)]
594 603 seen = {}
595 604 earliest = self.count()
596 605 while h:
597 606 d, n = heapq.heappop(h)
598 607 if n not in seen:
599 608 seen[n] = 1
600 609 r = self.rev(n)
601 610 yield (-d, n)
602 611 for p in self.parents(n):
603 612 heapq.heappush(h, (-dist[p], p))
604 613
605 614 def generations(node):
606 615 sg, s = None, {}
607 616 for g,n in ancestors(node):
608 617 if g != sg:
609 618 if sg:
610 619 yield sg, s
611 620 sg, s = g, {n:1}
612 621 else:
613 622 s[n] = 1
614 623 yield sg, s
615 624
616 625 x = generations(a)
617 626 y = generations(b)
618 627 gx = x.next()
619 628 gy = y.next()
620 629
621 630 # increment each ancestor list until it is closer to root than
622 631 # the other, or they match
623 632 while 1:
624 633 #print "ancestor gen %s %s" % (gx[0], gy[0])
625 634 if gx[0] == gy[0]:
626 635 # find the intersection
627 636 i = [ n for n in gx[1] if n in gy[1] ]
628 637 if i:
629 638 return i[0]
630 639 else:
631 640 #print "next"
632 641 gy = y.next()
633 642 gx = x.next()
634 643 elif gx[0] < gy[0]:
635 644 #print "next y"
636 645 gy = y.next()
637 646 else:
638 647 #print "next x"
639 648 gx = x.next()
640 649
641 650 def group(self, nodelist, lookup, infocollect = None):
642 651 """calculate a delta group
643 652
644 653 Given a list of changeset revs, return a set of deltas and
645 654 metadata corresponding to nodes. the first delta is
646 655 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
647 656 have this parent as it has all history before these
648 657 changesets. parent is parent[0]
649 658 """
650 659 revs = [self.rev(n) for n in nodelist]
651 660 needed = dict.fromkeys(revs, 1)
652 661
653 662 # if we don't have any revisions touched by these changesets, bail
654 663 if not revs:
655 664 yield struct.pack(">l", 0)
656 665 return
657 666
658 667 # add the parent of the first rev
659 668 p = self.parents(self.node(revs[0]))[0]
660 669 revs.insert(0, self.rev(p))
661 670
662 671 # for each delta that isn't contiguous in the log, we need to
663 672 # reconstruct the base, reconstruct the result, and then
664 673 # calculate the delta. We also need to do this where we've
665 674 # stored a full version and not a delta
666 675 for i in xrange(0, len(revs) - 1):
667 676 a, b = revs[i], revs[i + 1]
668 677 if a + 1 != b or self.base(b) == b:
669 678 for j in xrange(self.base(a), a + 1):
670 679 needed[j] = 1
671 680 for j in xrange(self.base(b), b + 1):
672 681 needed[j] = 1
673 682
674 683 # calculate spans to retrieve from datafile
675 684 needed = needed.keys()
676 685 needed.sort()
677 686 spans = []
678 687 oo = -1
679 688 ol = 0
680 689 for n in needed:
681 690 if n < 0: continue
682 691 o = self.start(n)
683 692 l = self.length(n)
684 693 if oo + ol == o: # can we merge with the previous?
685 694 nl = spans[-1][2]
686 695 nl.append((n, l))
687 696 ol += l
688 697 spans[-1] = (oo, ol, nl)
689 698 else:
690 699 oo = o
691 700 ol = l
692 701 spans.append((oo, ol, [(n, l)]))
693 702
694 703 # read spans in, divide up chunks
695 704 chunks = {}
696 705 for span in spans:
697 706 # we reopen the file for each span to make http happy for now
698 707 f = self.opener(self.datafile)
699 708 f.seek(span[0])
700 709 data = f.read(span[1])
701 710
702 711 # divide up the span
703 712 pos = 0
704 713 for r, l in span[2]:
705 714 chunks[r] = decompress(data[pos: pos + l])
706 715 pos += l
707 716
708 717 # helper to reconstruct intermediate versions
709 718 def construct(text, base, rev):
710 719 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
711 720 return mdiff.patches(text, bins)
712 721
713 722 # build deltas
714 723 deltas = []
715 724 for d in xrange(0, len(revs) - 1):
716 725 a, b = revs[d], revs[d + 1]
717 726 n = self.node(b)
718 727
719 728 if infocollect is not None:
720 729 infocollect(n)
721 730
722 731 # do we need to construct a new delta?
723 732 if a + 1 != b or self.base(b) == b:
724 733 if a >= 0:
725 734 base = self.base(a)
726 735 ta = chunks[self.base(a)]
727 736 ta = construct(ta, base, a)
728 737 else:
729 738 ta = ""
730 739
731 740 base = self.base(b)
732 741 if a > base:
733 742 base = a
734 743 tb = ta
735 744 else:
736 745 tb = chunks[self.base(b)]
737 746 tb = construct(tb, base, b)
738 747 d = self.diff(ta, tb)
739 748 else:
740 749 d = chunks[b]
741 750
742 751 p = self.parents(n)
743 752 meta = n + p[0] + p[1] + lookup(n)
744 753 l = struct.pack(">l", len(meta) + len(d) + 4)
745 754 yield l
746 755 yield meta
747 756 yield d
748 757
749 758 yield struct.pack(">l", 0)
750 759
751 760 def addgroup(self, revs, linkmapper, transaction, unique=0):
752 761 """
753 762 add a delta group
754 763
755 764 given a set of deltas, add them to the revision log. the
756 765 first delta is against its parent, which should be in our
757 766 log, the rest are against the previous delta.
758 767 """
759 768
760 769 #track the base of the current delta log
761 770 r = self.count()
762 771 t = r - 1
763 772 node = nullid
764 773
765 774 base = prev = -1
766 775 start = end = measure = 0
767 776 if r:
768 777 start = self.start(self.base(t))
769 778 end = self.end(t)
770 779 measure = self.length(self.base(t))
771 780 base = self.base(t)
772 781 prev = self.tip()
773 782
774 783 transaction.add(self.datafile, end)
775 784 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
776 785 dfh = self.opener(self.datafile, "a")
777 786 ifh = self.opener(self.indexfile, "a")
778 787
779 788 # loop through our set of deltas
780 789 chain = None
781 790 for chunk in revs:
782 791 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
783 792 link = linkmapper(cs)
784 793 if node in self.nodemap:
785 794 # this can happen if two branches make the same change
786 795 # if unique:
787 796 # raise RevlogError(_("already have %s") % hex(node[:4]))
788 797 chain = node
789 798 continue
790 799 delta = chunk[80:]
791 800
792 801 for p in (p1, p2):
793 802 if not p in self.nodemap:
794 803 raise RevlogError(_("unknown parent %s") % short(p1))
795 804
796 805 if not chain:
797 806 # retrieve the parent revision of the delta chain
798 807 chain = p1
799 808 if not chain in self.nodemap:
800 809 raise RevlogError(_("unknown base %s") % short(chain[:4]))
801 810
802 811 # full versions are inserted when the needed deltas become
803 812 # comparable to the uncompressed text or when the previous
804 813 # version is not the one we have a delta against. We use
805 814 # the size of the previous full rev as a proxy for the
806 815 # current size.
807 816
808 817 if chain == prev:
809 818 tempd = compress(delta)
810 819 cdelta = tempd[0] + tempd[1]
811 820
812 821 if chain != prev or (end - start + len(cdelta)) > measure * 2:
813 822 # flush our writes here so we can read it in revision
814 823 dfh.flush()
815 824 ifh.flush()
816 825 text = self.revision(chain)
817 826 text = self.patches(text, [delta])
818 827 chk = self.addrevision(text, transaction, link, p1, p2)
819 828 if chk != node:
820 829 raise RevlogError(_("consistency error adding group"))
821 830 measure = len(text)
822 831 else:
823 832 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
824 833 self.index.append(e)
825 834 self.nodemap[node] = r
826 835 dfh.write(cdelta)
827 836 ifh.write(struct.pack(indexformat, *e))
828 837
829 838 t, r, chain, prev = r, r + 1, node, node
830 839 start = self.start(self.base(t))
831 840 end = self.end(t)
832 841
833 842 dfh.close()
834 843 ifh.close()
835 844 return node
836 845
846 def strip(self, rev, minlink):
847 if self.count() == 0 or rev >= self.count():
848 return
849
850 # When stripping away a revision, we need to make sure it
851 # does not actually belong to an older changeset.
852 # The minlink parameter defines the oldest revision
853 # we're allowed to strip away.
854 while minlink > self.index[rev][3]:
855 rev += 1
856 if rev >= self.count():
857 return
858
859 # first truncate the files on disk
860 end = self.start(rev)
861 self.opener(self.datafile, "a").truncate(end)
862 end = rev * struct.calcsize(indexformat)
863 self.opener(self.indexfile, "a").truncate(end)
864
865 # then reset internal state in memory to forget those revisions
866 self.cache = None
867 for p in self.index[rev:]:
868 del self.nodemap[p[6]]
869 del self.index[rev:]
870
871 # truncating the lazyindex also truncates the lazymap.
872 if isinstance(self.index, lazyindex):
873 self.index.trunc(end)
874
875
837 876 def checksize(self):
838 877 expected = 0
839 878 if self.count():
840 879 expected = self.end(self.count() - 1)
841 880 try:
842 881 f = self.opener(self.datafile)
843 882 f.seek(0, 2)
844 883 actual = f.tell()
845 884 return expected - actual
846 885 except IOError, inst:
847 886 if inst.errno == errno.ENOENT:
848 887 return 0
849 888 raise
850 889
851 890
General Comments 0
You need to be logged in to leave comments. Login now