##// END OF EJS Templates
Handle empty logs in repo.checksize
Matt Mackall -
r1494:249ca10d default
parent child Browse files
Show More
@@ -1,834 +1,841 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
17 17
18 18 def hash(text, p1, p2):
19 19 """generate a hash from the given text and its parent hashes
20 20
21 21 This hash combines both the current file contents and its history
22 22 in a manner that makes it easy to distinguish nodes with the same
23 23 content in the revision graph.
24 24 """
25 25 l = [p1, p2]
26 26 l.sort()
27 27 s = sha.new(l[0])
28 28 s.update(l[1])
29 29 s.update(text)
30 30 return s.digest()
31 31
32 32 def compress(text):
33 33 """ generate a possibly-compressed representation of text """
34 34 if not text: return text
35 35 if len(text) < 44:
36 36 if text[0] == '\0': return text
37 37 return 'u' + text
38 38 bin = zlib.compress(text)
39 39 if len(bin) > len(text):
40 40 if text[0] == '\0': return text
41 41 return 'u' + text
42 42 return bin
43 43
44 44 def decompress(bin):
45 45 """ decompress the given input """
46 46 if not bin: return bin
47 47 t = bin[0]
48 48 if t == '\0': return bin
49 49 if t == 'x': return zlib.decompress(bin)
50 50 if t == 'u': return bin[1:]
51 51 raise RevlogError(_("unknown compression type %s") % t)
52 52
53 53 indexformat = ">4l20s20s20s"
54 54
55 55 class lazyparser:
56 56 """
57 57 this class avoids the need to parse the entirety of large indices
58 58
59 59 By default we parse and load 1000 entries at a time.
60 60
61 61 If no position is specified, we load the whole index, and replace
62 62 the lazy objects in revlog with the underlying objects for
63 63 efficiency in cases where we look at most of the nodes.
64 64 """
65 65 def __init__(self, data, revlog):
66 66 self.data = data
67 67 self.s = struct.calcsize(indexformat)
68 68 self.l = len(data)/self.s
69 69 self.index = [None] * self.l
70 70 self.map = {nullid: -1}
71 71 self.all = 0
72 72 self.revlog = revlog
73 73
74 74 def load(self, pos=None):
75 75 if self.all: return
76 76 if pos is not None:
77 77 block = pos / 1000
78 78 i = block * 1000
79 79 end = min(self.l, i + 1000)
80 80 else:
81 81 self.all = 1
82 82 i = 0
83 83 end = self.l
84 84 self.revlog.index = self.index
85 85 self.revlog.nodemap = self.map
86 86
87 87 while i < end:
88 88 d = self.data[i * self.s: (i + 1) * self.s]
89 89 e = struct.unpack(indexformat, d)
90 90 self.index[i] = e
91 91 self.map[e[6]] = i
92 92 i += 1
93 93
94 94 class lazyindex:
95 95 """a lazy version of the index array"""
96 96 def __init__(self, parser):
97 97 self.p = parser
98 98 def __len__(self):
99 99 return len(self.p.index)
100 100 def load(self, pos):
101 101 if pos < 0:
102 102 pos += len(self.p.index)
103 103 self.p.load(pos)
104 104 return self.p.index[pos]
105 105 def __getitem__(self, pos):
106 106 return self.p.index[pos] or self.load(pos)
107 107 def append(self, e):
108 108 self.p.index.append(e)
109 109
110 110 class lazymap:
111 111 """a lazy version of the node map"""
112 112 def __init__(self, parser):
113 113 self.p = parser
114 114 def load(self, key):
115 115 if self.p.all: return
116 116 n = self.p.data.find(key)
117 117 if n < 0:
118 118 raise KeyError(key)
119 119 pos = n / self.p.s
120 120 self.p.load(pos)
121 121 def __contains__(self, key):
122 122 self.p.load()
123 123 return key in self.p.map
124 124 def __iter__(self):
125 125 yield nullid
126 126 for i in xrange(self.p.l):
127 127 try:
128 128 yield self.p.index[i][6]
129 129 except:
130 130 self.p.load(i)
131 131 yield self.p.index[i][6]
132 132 def __getitem__(self, key):
133 133 try:
134 134 return self.p.map[key]
135 135 except KeyError:
136 136 try:
137 137 self.load(key)
138 138 return self.p.map[key]
139 139 except KeyError:
140 140 raise KeyError("node " + hex(key))
141 141 def __setitem__(self, key, val):
142 142 self.p.map[key] = val
143 143
144 144 class RevlogError(Exception): pass
145 145
146 146 class revlog:
147 147 """
148 148 the underlying revision storage object
149 149
150 150 A revlog consists of two parts, an index and the revision data.
151 151
152 152 The index is a file with a fixed record size containing
153 153 information on each revision, includings its nodeid (hash), the
154 154 nodeids of its parents, the position and offset of its data within
155 155 the data file, and the revision it's based on. Finally, each entry
156 156 contains a linkrev entry that can serve as a pointer to external
157 157 data.
158 158
159 159 The revision data itself is a linear collection of data chunks.
160 160 Each chunk represents a revision and is usually represented as a
161 161 delta against the previous chunk. To bound lookup time, runs of
162 162 deltas are limited to about 2 times the length of the original
163 163 version data. This makes retrieval of a version proportional to
164 164 its size, or O(1) relative to the number of revisions.
165 165
166 166 Both pieces of the revlog are written to in an append-only
167 167 fashion, which means we never need to rewrite a file to insert or
168 168 remove data, and can use some simple techniques to avoid the need
169 169 for locking while reading.
170 170 """
171 171 def __init__(self, opener, indexfile, datafile):
172 172 """
173 173 create a revlog object
174 174
175 175 opener is a function that abstracts the file opening operation
176 176 and can be used to implement COW semantics or the like.
177 177 """
178 178 self.indexfile = indexfile
179 179 self.datafile = datafile
180 180 self.opener = opener
181 181 self.cache = None
182 182
183 183 try:
184 184 i = self.opener(self.indexfile).read()
185 185 except IOError, inst:
186 186 if inst.errno != errno.ENOENT:
187 187 raise
188 188 i = ""
189 189
190 190 if len(i) > 10000:
191 191 # big index, let's parse it on demand
192 192 parser = lazyparser(i, self)
193 193 self.index = lazyindex(parser)
194 194 self.nodemap = lazymap(parser)
195 195 else:
196 196 s = struct.calcsize(indexformat)
197 197 l = len(i) / s
198 198 self.index = [None] * l
199 199 m = [None] * l
200 200
201 201 n = 0
202 202 for f in xrange(0, len(i), s):
203 203 # offset, size, base, linkrev, p1, p2, nodeid
204 204 e = struct.unpack(indexformat, i[f:f + s])
205 205 m[n] = (e[6], n)
206 206 self.index[n] = e
207 207 n += 1
208 208
209 209 self.nodemap = dict(m)
210 210 self.nodemap[nullid] = -1
211 211
212 212 def tip(self): return self.node(len(self.index) - 1)
213 213 def count(self): return len(self.index)
214 214 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
215 215 def rev(self, node):
216 216 try:
217 217 return self.nodemap[node]
218 218 except KeyError:
219 219 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
220 220 def linkrev(self, node): return self.index[self.rev(node)][3]
221 221 def parents(self, node):
222 222 if node == nullid: return (nullid, nullid)
223 223 return self.index[self.rev(node)][4:6]
224 224
225 225 def start(self, rev): return self.index[rev][0]
226 226 def length(self, rev): return self.index[rev][1]
227 227 def end(self, rev): return self.start(rev) + self.length(rev)
228 228 def base(self, rev): return self.index[rev][2]
229 229
230 230 def reachable(self, rev, stop=None):
231 231 reachable = {}
232 232 visit = [rev]
233 233 reachable[rev] = 1
234 234 if stop:
235 235 stopn = self.rev(stop)
236 236 else:
237 237 stopn = 0
238 238 while visit:
239 239 n = visit.pop(0)
240 240 if n == stop:
241 241 continue
242 242 if n == nullid:
243 243 continue
244 244 for p in self.parents(n):
245 245 if self.rev(p) < stopn:
246 246 continue
247 247 if p not in reachable:
248 248 reachable[p] = 1
249 249 visit.append(p)
250 250 return reachable
251 251
252 252 def nodesbetween(self, roots=None, heads=None):
253 253 """Return a tuple containing three elements. Elements 1 and 2 contain
254 254 a final list bases and heads after all the unreachable ones have been
255 255 pruned. Element 0 contains a topologically sorted list of all
256 256
257 257 nodes that satisfy these constraints:
258 258 1. All nodes must be descended from a node in roots (the nodes on
259 259 roots are considered descended from themselves).
260 260 2. All nodes must also be ancestors of a node in heads (the nodes in
261 261 heads are considered to be their own ancestors).
262 262
263 263 If roots is unspecified, nullid is assumed as the only root.
264 264 If heads is unspecified, it is taken to be the output of the
265 265 heads method (i.e. a list of all nodes in the repository that
266 266 have no children)."""
267 267 nonodes = ([], [], [])
268 268 if roots is not None:
269 269 roots = list(roots)
270 270 if not roots:
271 271 return nonodes
272 272 lowestrev = min([self.rev(n) for n in roots])
273 273 else:
274 274 roots = [nullid] # Everybody's a descendent of nullid
275 275 lowestrev = -1
276 276 if (lowestrev == -1) and (heads is None):
277 277 # We want _all_ the nodes!
278 278 return ([self.node(r) for r in xrange(0, self.count())],
279 279 [nullid], list(self.heads()))
280 280 if heads is None:
281 281 # All nodes are ancestors, so the latest ancestor is the last
282 282 # node.
283 283 highestrev = self.count() - 1
284 284 # Set ancestors to None to signal that every node is an ancestor.
285 285 ancestors = None
286 286 # Set heads to an empty dictionary for later discovery of heads
287 287 heads = {}
288 288 else:
289 289 heads = list(heads)
290 290 if not heads:
291 291 return nonodes
292 292 ancestors = {}
293 293 # Start at the top and keep marking parents until we're done.
294 294 nodestotag = heads[:]
295 295 # Turn heads into a dictionary so we can remove 'fake' heads.
296 296 # Also, later we will be using it to filter out the heads we can't
297 297 # find from roots.
298 298 heads = dict.fromkeys(heads, 0)
299 299 # Remember where the top was so we can use it as a limit later.
300 300 highestrev = max([self.rev(n) for n in nodestotag])
301 301 while nodestotag:
302 302 # grab a node to tag
303 303 n = nodestotag.pop()
304 304 # Never tag nullid
305 305 if n == nullid:
306 306 continue
307 307 # A node's revision number represents its place in a
308 308 # topologically sorted list of nodes.
309 309 r = self.rev(n)
310 310 if r >= lowestrev:
311 311 if n not in ancestors:
312 312 # If we are possibly a descendent of one of the roots
313 313 # and we haven't already been marked as an ancestor
314 314 ancestors[n] = 1 # Mark as ancestor
315 315 # Add non-nullid parents to list of nodes to tag.
316 316 nodestotag.extend([p for p in self.parents(n) if
317 317 p != nullid])
318 318 elif n in heads: # We've seen it before, is it a fake head?
319 319 # So it is, real heads should not be the ancestors of
320 320 # any other heads.
321 321 heads.pop(n)
322 322 if not ancestors:
323 323 return nonodes
324 324 # Now that we have our set of ancestors, we want to remove any
325 325 # roots that are not ancestors.
326 326
327 327 # If one of the roots was nullid, everything is included anyway.
328 328 if lowestrev > -1:
329 329 # But, since we weren't, let's recompute the lowest rev to not
330 330 # include roots that aren't ancestors.
331 331
332 332 # Filter out roots that aren't ancestors of heads
333 333 roots = [n for n in roots if n in ancestors]
334 334 # Recompute the lowest revision
335 335 if roots:
336 336 lowestrev = min([self.rev(n) for n in roots])
337 337 else:
338 338 # No more roots? Return empty list
339 339 return nonodes
340 340 else:
341 341 # We are descending from nullid, and don't need to care about
342 342 # any other roots.
343 343 lowestrev = -1
344 344 roots = [nullid]
345 345 # Transform our roots list into a 'set' (i.e. a dictionary where the
346 346 # values don't matter.
347 347 descendents = dict.fromkeys(roots, 1)
348 348 # Also, keep the original roots so we can filter out roots that aren't
349 349 # 'real' roots (i.e. are descended from other roots).
350 350 roots = descendents.copy()
351 351 # Our topologically sorted list of output nodes.
352 352 orderedout = []
353 353 # Don't start at nullid since we don't want nullid in our output list,
354 354 # and if nullid shows up in descedents, empty parents will look like
355 355 # they're descendents.
356 356 for r in xrange(max(lowestrev, 0), highestrev + 1):
357 357 n = self.node(r)
358 358 isdescendent = False
359 359 if lowestrev == -1: # Everybody is a descendent of nullid
360 360 isdescendent = True
361 361 elif n in descendents:
362 362 # n is already a descendent
363 363 isdescendent = True
364 364 # This check only needs to be done here because all the roots
365 365 # will start being marked is descendents before the loop.
366 366 if n in roots:
367 367 # If n was a root, check if it's a 'real' root.
368 368 p = tuple(self.parents(n))
369 369 # If any of its parents are descendents, it's not a root.
370 370 if (p[0] in descendents) or (p[1] in descendents):
371 371 roots.pop(n)
372 372 else:
373 373 p = tuple(self.parents(n))
374 374 # A node is a descendent if either of its parents are
375 375 # descendents. (We seeded the dependents list with the roots
376 376 # up there, remember?)
377 377 if (p[0] in descendents) or (p[1] in descendents):
378 378 descendents[n] = 1
379 379 isdescendent = True
380 380 if isdescendent and ((ancestors is None) or (n in ancestors)):
381 381 # Only include nodes that are both descendents and ancestors.
382 382 orderedout.append(n)
383 383 if (ancestors is not None) and (n in heads):
384 384 # We're trying to figure out which heads are reachable
385 385 # from roots.
386 386 # Mark this head as having been reached
387 387 heads[n] = 1
388 388 elif ancestors is None:
389 389 # Otherwise, we're trying to discover the heads.
390 390 # Assume this is a head because if it isn't, the next step
391 391 # will eventually remove it.
392 392 heads[n] = 1
393 393 # But, obviously its parents aren't.
394 394 for p in self.parents(n):
395 395 heads.pop(p, None)
396 396 heads = [n for n in heads.iterkeys() if heads[n] != 0]
397 397 roots = roots.keys()
398 398 assert orderedout
399 399 assert roots
400 400 assert heads
401 401 return (orderedout, roots, heads)
402 402
403 403 def heads(self, stop=None):
404 404 """return the list of all nodes that have no children"""
405 405 p = {}
406 406 h = []
407 407 stoprev = 0
408 408 if stop and stop in self.nodemap:
409 409 stoprev = self.rev(stop)
410 410
411 411 for r in range(self.count() - 1, -1, -1):
412 412 n = self.node(r)
413 413 if n not in p:
414 414 h.append(n)
415 415 if n == stop:
416 416 break
417 417 if r < stoprev:
418 418 break
419 419 for pn in self.parents(n):
420 420 p[pn] = 1
421 421 return h
422 422
423 423 def children(self, node):
424 424 """find the children of a given node"""
425 425 c = []
426 426 p = self.rev(node)
427 427 for r in range(p + 1, self.count()):
428 428 n = self.node(r)
429 429 for pn in self.parents(n):
430 430 if pn == node:
431 431 c.append(n)
432 432 continue
433 433 elif pn == nullid:
434 434 continue
435 435 return c
436 436
437 437 def lookup(self, id):
438 438 """locate a node based on revision number or subset of hex nodeid"""
439 439 try:
440 440 rev = int(id)
441 441 if str(rev) != id: raise ValueError
442 442 if rev < 0: rev = self.count() + rev
443 443 if rev < 0 or rev >= self.count(): raise ValueError
444 444 return self.node(rev)
445 445 except (ValueError, OverflowError):
446 446 c = []
447 447 for n in self.nodemap:
448 448 if hex(n).startswith(id):
449 449 c.append(n)
450 450 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
451 451 if len(c) < 1: raise RevlogError(_("No match found"))
452 452 return c[0]
453 453
454 454 return None
455 455
456 456 def diff(self, a, b):
457 457 """return a delta between two revisions"""
458 458 return mdiff.textdiff(a, b)
459 459
460 460 def patches(self, t, pl):
461 461 """apply a list of patches to a string"""
462 462 return mdiff.patches(t, pl)
463 463
464 464 def delta(self, node):
465 465 """return or calculate a delta between a node and its predecessor"""
466 466 r = self.rev(node)
467 467 b = self.base(r)
468 468 if r == b:
469 469 return self.diff(self.revision(self.node(r - 1)),
470 470 self.revision(node))
471 471 else:
472 472 f = self.opener(self.datafile)
473 473 f.seek(self.start(r))
474 474 data = f.read(self.length(r))
475 475 return decompress(data)
476 476
477 477 def revision(self, node):
478 478 """return an uncompressed revision of a given"""
479 479 if node == nullid: return ""
480 480 if self.cache and self.cache[0] == node: return self.cache[2]
481 481
482 482 # look up what we need to read
483 483 text = None
484 484 rev = self.rev(node)
485 485 start, length, base, link, p1, p2, node = self.index[rev]
486 486 end = start + length
487 487 if base != rev: start = self.start(base)
488 488
489 489 # do we have useful data cached?
490 490 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
491 491 base = self.cache[1]
492 492 start = self.start(base + 1)
493 493 text = self.cache[2]
494 494 last = 0
495 495
496 496 f = self.opener(self.datafile)
497 497 f.seek(start)
498 498 data = f.read(end - start)
499 499
500 500 if text is None:
501 501 last = self.length(base)
502 502 text = decompress(data[:last])
503 503
504 504 bins = []
505 505 for r in xrange(base + 1, rev + 1):
506 506 s = self.length(r)
507 507 bins.append(decompress(data[last:last + s]))
508 508 last = last + s
509 509
510 510 text = mdiff.patches(text, bins)
511 511
512 512 if node != hash(text, p1, p2):
513 513 raise RevlogError(_("integrity check failed on %s:%d")
514 514 % (self.datafile, rev))
515 515
516 516 self.cache = (node, rev, text)
517 517 return text
518 518
519 519 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
520 520 """add a revision to the log
521 521
522 522 text - the revision data to add
523 523 transaction - the transaction object used for rollback
524 524 link - the linkrev data to add
525 525 p1, p2 - the parent nodeids of the revision
526 526 d - an optional precomputed delta
527 527 """
528 528 if text is None: text = ""
529 529 if p1 is None: p1 = self.tip()
530 530 if p2 is None: p2 = nullid
531 531
532 532 node = hash(text, p1, p2)
533 533
534 534 if node in self.nodemap:
535 535 return node
536 536
537 537 n = self.count()
538 538 t = n - 1
539 539
540 540 if n:
541 541 base = self.base(t)
542 542 start = self.start(base)
543 543 end = self.end(t)
544 544 if not d:
545 545 prev = self.revision(self.tip())
546 546 d = self.diff(prev, text)
547 547 data = compress(d)
548 548 dist = end - start + len(data)
549 549
550 550 # full versions are inserted when the needed deltas
551 551 # become comparable to the uncompressed text
552 552 if not n or dist > len(text) * 2:
553 553 data = compress(text)
554 554 base = n
555 555 else:
556 556 base = self.base(t)
557 557
558 558 offset = 0
559 559 if t >= 0:
560 560 offset = self.end(t)
561 561
562 562 e = (offset, len(data), base, link, p1, p2, node)
563 563
564 564 self.index.append(e)
565 565 self.nodemap[node] = n
566 566 entry = struct.pack(indexformat, *e)
567 567
568 568 transaction.add(self.datafile, e[0])
569 569 self.opener(self.datafile, "a").write(data)
570 570 transaction.add(self.indexfile, n * len(entry))
571 571 self.opener(self.indexfile, "a").write(entry)
572 572
573 573 self.cache = (node, n, text)
574 574 return node
575 575
576 576 def ancestor(self, a, b):
577 577 """calculate the least common ancestor of nodes a and b"""
578 578 # calculate the distance of every node from root
579 579 dist = {nullid: 0}
580 580 for i in xrange(self.count()):
581 581 n = self.node(i)
582 582 p1, p2 = self.parents(n)
583 583 dist[n] = max(dist[p1], dist[p2]) + 1
584 584
585 585 # traverse ancestors in order of decreasing distance from root
586 586 def ancestors(node):
587 587 # we store negative distances because heap returns smallest member
588 588 h = [(-dist[node], node)]
589 589 seen = {}
590 590 earliest = self.count()
591 591 while h:
592 592 d, n = heapq.heappop(h)
593 593 if n not in seen:
594 594 seen[n] = 1
595 595 r = self.rev(n)
596 596 yield (-d, n)
597 597 for p in self.parents(n):
598 598 heapq.heappush(h, (-dist[p], p))
599 599
600 600 def generations(node):
601 601 sg, s = None, {}
602 602 for g,n in ancestors(node):
603 603 if g != sg:
604 604 if sg:
605 605 yield sg, s
606 606 sg, s = g, {n:1}
607 607 else:
608 608 s[n] = 1
609 609 yield sg, s
610 610
611 611 x = generations(a)
612 612 y = generations(b)
613 613 gx = x.next()
614 614 gy = y.next()
615 615
616 616 # increment each ancestor list until it is closer to root than
617 617 # the other, or they match
618 618 while 1:
619 619 #print "ancestor gen %s %s" % (gx[0], gy[0])
620 620 if gx[0] == gy[0]:
621 621 # find the intersection
622 622 i = [ n for n in gx[1] if n in gy[1] ]
623 623 if i:
624 624 return i[0]
625 625 else:
626 626 #print "next"
627 627 gy = y.next()
628 628 gx = x.next()
629 629 elif gx[0] < gy[0]:
630 630 #print "next y"
631 631 gy = y.next()
632 632 else:
633 633 #print "next x"
634 634 gx = x.next()
635 635
636 636 def group(self, nodelist, lookup, infocollect = None):
637 637 """calculate a delta group
638 638
639 639 Given a list of changeset revs, return a set of deltas and
640 640 metadata corresponding to nodes. the first delta is
641 641 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
642 642 have this parent as it has all history before these
643 643 changesets. parent is parent[0]
644 644 """
645 645 revs = [self.rev(n) for n in nodelist]
646 646 needed = dict.fromkeys(revs, 1)
647 647
648 648 # if we don't have any revisions touched by these changesets, bail
649 649 if not revs:
650 650 yield struct.pack(">l", 0)
651 651 return
652 652
653 653 # add the parent of the first rev
654 654 p = self.parents(self.node(revs[0]))[0]
655 655 revs.insert(0, self.rev(p))
656 656
657 657 # for each delta that isn't contiguous in the log, we need to
658 658 # reconstruct the base, reconstruct the result, and then
659 659 # calculate the delta. We also need to do this where we've
660 660 # stored a full version and not a delta
661 661 for i in xrange(0, len(revs) - 1):
662 662 a, b = revs[i], revs[i + 1]
663 663 if a + 1 != b or self.base(b) == b:
664 664 for j in xrange(self.base(a), a + 1):
665 665 needed[j] = 1
666 666 for j in xrange(self.base(b), b + 1):
667 667 needed[j] = 1
668 668
669 669 # calculate spans to retrieve from datafile
670 670 needed = needed.keys()
671 671 needed.sort()
672 672 spans = []
673 673 oo = -1
674 674 ol = 0
675 675 for n in needed:
676 676 if n < 0: continue
677 677 o = self.start(n)
678 678 l = self.length(n)
679 679 if oo + ol == o: # can we merge with the previous?
680 680 nl = spans[-1][2]
681 681 nl.append((n, l))
682 682 ol += l
683 683 spans[-1] = (oo, ol, nl)
684 684 else:
685 685 oo = o
686 686 ol = l
687 687 spans.append((oo, ol, [(n, l)]))
688 688
689 689 # read spans in, divide up chunks
690 690 chunks = {}
691 691 for span in spans:
692 692 # we reopen the file for each span to make http happy for now
693 693 f = self.opener(self.datafile)
694 694 f.seek(span[0])
695 695 data = f.read(span[1])
696 696
697 697 # divide up the span
698 698 pos = 0
699 699 for r, l in span[2]:
700 700 chunks[r] = decompress(data[pos: pos + l])
701 701 pos += l
702 702
703 703 # helper to reconstruct intermediate versions
704 704 def construct(text, base, rev):
705 705 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
706 706 return mdiff.patches(text, bins)
707 707
708 708 # build deltas
709 709 deltas = []
710 710 for d in xrange(0, len(revs) - 1):
711 711 a, b = revs[d], revs[d + 1]
712 712 n = self.node(b)
713 713
714 714 if infocollect is not None:
715 715 infocollect(n)
716 716
717 717 # do we need to construct a new delta?
718 718 if a + 1 != b or self.base(b) == b:
719 719 if a >= 0:
720 720 base = self.base(a)
721 721 ta = chunks[self.base(a)]
722 722 ta = construct(ta, base, a)
723 723 else:
724 724 ta = ""
725 725
726 726 base = self.base(b)
727 727 if a > base:
728 728 base = a
729 729 tb = ta
730 730 else:
731 731 tb = chunks[self.base(b)]
732 732 tb = construct(tb, base, b)
733 733 d = self.diff(ta, tb)
734 734 else:
735 735 d = chunks[b]
736 736
737 737 p = self.parents(n)
738 738 meta = n + p[0] + p[1] + lookup(n)
739 739 l = struct.pack(">l", len(meta) + len(d) + 4)
740 740 yield l
741 741 yield meta
742 742 yield d
743 743
744 744 yield struct.pack(">l", 0)
745 745
746 746 def addgroup(self, revs, linkmapper, transaction, unique=0):
747 747 """
748 748 add a delta group
749 749
750 750 given a set of deltas, add them to the revision log. the
751 751 first delta is against its parent, which should be in our
752 752 log, the rest are against the previous delta.
753 753 """
754 754
755 755 #track the base of the current delta log
756 756 r = self.count()
757 757 t = r - 1
758 758 node = nullid
759 759
760 760 base = prev = -1
761 761 start = end = measure = 0
762 762 if r:
763 763 start = self.start(self.base(t))
764 764 end = self.end(t)
765 765 measure = self.length(self.base(t))
766 766 base = self.base(t)
767 767 prev = self.tip()
768 768
769 769 transaction.add(self.datafile, end)
770 770 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
771 771 dfh = self.opener(self.datafile, "a")
772 772 ifh = self.opener(self.indexfile, "a")
773 773
774 774 # loop through our set of deltas
775 775 chain = None
776 776 for chunk in revs:
777 777 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
778 778 link = linkmapper(cs)
779 779 if node in self.nodemap:
780 780 # this can happen if two branches make the same change
781 781 # if unique:
782 782 # raise RevlogError(_("already have %s") % hex(node[:4]))
783 783 chain = node
784 784 continue
785 785 delta = chunk[80:]
786 786
787 787 if not chain:
788 788 # retrieve the parent revision of the delta chain
789 789 chain = p1
790 790 if not chain in self.nodemap:
791 791 raise RevlogError(_("unknown base %s") % short(chain[:4]))
792 792
793 793 # full versions are inserted when the needed deltas become
794 794 # comparable to the uncompressed text or when the previous
795 795 # version is not the one we have a delta against. We use
796 796 # the size of the previous full rev as a proxy for the
797 797 # current size.
798 798
799 799 if chain == prev:
800 800 cdelta = compress(delta)
801 801
802 802 if chain != prev or (end - start + len(cdelta)) > measure * 2:
803 803 # flush our writes here so we can read it in revision
804 804 dfh.flush()
805 805 ifh.flush()
806 806 text = self.revision(chain)
807 807 text = self.patches(text, [delta])
808 808 chk = self.addrevision(text, transaction, link, p1, p2)
809 809 if chk != node:
810 810 raise RevlogError(_("consistency error adding group"))
811 811 measure = len(text)
812 812 else:
813 813 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
814 814 self.index.append(e)
815 815 self.nodemap[node] = r
816 816 dfh.write(cdelta)
817 817 ifh.write(struct.pack(indexformat, *e))
818 818
819 819 t, r, chain, prev = r, r + 1, node, node
820 820 start = self.start(self.base(t))
821 821 end = self.end(t)
822 822
823 823 dfh.close()
824 824 ifh.close()
825 825 return node
826 826
827 827 def checksize(self):
828 828 expected = 0
829 829 if self.count():
830 830 expected = self.end(self.count() - 1)
831 try:
831 832 f = self.opener(self.datafile)
832 833 f.seek(0, 2)
833 834 actual = f.tell()
834 835 return expected - actual
836 except IOError, inst:
837 if inst.errno == errno.ENOENT:
838 return 0
839 raise
840
841
General Comments 0
You need to be logged in to leave comments. Login now