##// END OF EJS Templates
Fix to handle case of empty list for roots or heads in nodesbetween.
Eric Hopper -
r1463:26e73acc default
parent child Browse files
Show More
@@ -1,816 +1,822 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from demandload import demandload
15 15 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
16 16
17 17 def hash(text, p1, p2):
18 18 """generate a hash from the given text and its parent hashes
19 19
20 20 This hash combines both the current file contents and its history
21 21 in a manner that makes it easy to distinguish nodes with the same
22 22 content in the revision graph.
23 23 """
24 24 l = [p1, p2]
25 25 l.sort()
26 26 s = sha.new(l[0])
27 27 s.update(l[1])
28 28 s.update(text)
29 29 return s.digest()
30 30
31 31 def compress(text):
32 32 """ generate a possibly-compressed representation of text """
33 33 if not text: return text
34 34 if len(text) < 44:
35 35 if text[0] == '\0': return text
36 36 return 'u' + text
37 37 bin = zlib.compress(text)
38 38 if len(bin) > len(text):
39 39 if text[0] == '\0': return text
40 40 return 'u' + text
41 41 return bin
42 42
43 43 def decompress(bin):
44 44 """ decompress the given input """
45 45 if not bin: return bin
46 46 t = bin[0]
47 47 if t == '\0': return bin
48 48 if t == 'x': return zlib.decompress(bin)
49 49 if t == 'u': return bin[1:]
50 50 raise RevlogError("unknown compression type %s" % t)
51 51
52 52 indexformat = ">4l20s20s20s"
53 53
54 54 class lazyparser:
55 55 """
56 56 this class avoids the need to parse the entirety of large indices
57 57
58 58 By default we parse and load 1000 entries at a time.
59 59
60 60 If no position is specified, we load the whole index, and replace
61 61 the lazy objects in revlog with the underlying objects for
62 62 efficiency in cases where we look at most of the nodes.
63 63 """
64 64 def __init__(self, data, revlog):
65 65 self.data = data
66 66 self.s = struct.calcsize(indexformat)
67 67 self.l = len(data)/self.s
68 68 self.index = [None] * self.l
69 69 self.map = {nullid: -1}
70 70 self.all = 0
71 71 self.revlog = revlog
72 72
73 73 def load(self, pos=None):
74 74 if self.all: return
75 75 if pos is not None:
76 76 block = pos / 1000
77 77 i = block * 1000
78 78 end = min(self.l, i + 1000)
79 79 else:
80 80 self.all = 1
81 81 i = 0
82 82 end = self.l
83 83 self.revlog.index = self.index
84 84 self.revlog.nodemap = self.map
85 85
86 86 while i < end:
87 87 d = self.data[i * self.s: (i + 1) * self.s]
88 88 e = struct.unpack(indexformat, d)
89 89 self.index[i] = e
90 90 self.map[e[6]] = i
91 91 i += 1
92 92
93 93 class lazyindex:
94 94 """a lazy version of the index array"""
95 95 def __init__(self, parser):
96 96 self.p = parser
97 97 def __len__(self):
98 98 return len(self.p.index)
99 99 def load(self, pos):
100 100 self.p.load(pos)
101 101 return self.p.index[pos]
102 102 def __getitem__(self, pos):
103 103 return self.p.index[pos] or self.load(pos)
104 104 def append(self, e):
105 105 self.p.index.append(e)
106 106
107 107 class lazymap:
108 108 """a lazy version of the node map"""
109 109 def __init__(self, parser):
110 110 self.p = parser
111 111 def load(self, key):
112 112 if self.p.all: return
113 113 n = self.p.data.find(key)
114 114 if n < 0:
115 115 raise KeyError(key)
116 116 pos = n / self.p.s
117 117 self.p.load(pos)
118 118 def __contains__(self, key):
119 119 self.p.load()
120 120 return key in self.p.map
121 121 def __iter__(self):
122 122 yield nullid
123 123 for i in xrange(self.p.l):
124 124 try:
125 125 yield self.p.index[i][6]
126 126 except:
127 127 self.p.load(i)
128 128 yield self.p.index[i][6]
129 129 def __getitem__(self, key):
130 130 try:
131 131 return self.p.map[key]
132 132 except KeyError:
133 133 try:
134 134 self.load(key)
135 135 return self.p.map[key]
136 136 except KeyError:
137 137 raise KeyError("node " + hex(key))
138 138 def __setitem__(self, key, val):
139 139 self.p.map[key] = val
140 140
141 141 class RevlogError(Exception): pass
142 142
143 143 class revlog:
144 144 """
145 145 the underlying revision storage object
146 146
147 147 A revlog consists of two parts, an index and the revision data.
148 148
149 149 The index is a file with a fixed record size containing
150 150 information on each revision, includings its nodeid (hash), the
151 151 nodeids of its parents, the position and offset of its data within
152 152 the data file, and the revision it's based on. Finally, each entry
153 153 contains a linkrev entry that can serve as a pointer to external
154 154 data.
155 155
156 156 The revision data itself is a linear collection of data chunks.
157 157 Each chunk represents a revision and is usually represented as a
158 158 delta against the previous chunk. To bound lookup time, runs of
159 159 deltas are limited to about 2 times the length of the original
160 160 version data. This makes retrieval of a version proportional to
161 161 its size, or O(1) relative to the number of revisions.
162 162
163 163 Both pieces of the revlog are written to in an append-only
164 164 fashion, which means we never need to rewrite a file to insert or
165 165 remove data, and can use some simple techniques to avoid the need
166 166 for locking while reading.
167 167 """
168 168 def __init__(self, opener, indexfile, datafile):
169 169 """
170 170 create a revlog object
171 171
172 172 opener is a function that abstracts the file opening operation
173 173 and can be used to implement COW semantics or the like.
174 174 """
175 175 self.indexfile = indexfile
176 176 self.datafile = datafile
177 177 self.opener = opener
178 178 self.cache = None
179 179
180 180 try:
181 181 i = self.opener(self.indexfile).read()
182 182 except IOError, inst:
183 183 if inst.errno != errno.ENOENT:
184 184 raise
185 185 i = ""
186 186
187 187 if len(i) > 10000:
188 188 # big index, let's parse it on demand
189 189 parser = lazyparser(i, self)
190 190 self.index = lazyindex(parser)
191 191 self.nodemap = lazymap(parser)
192 192 else:
193 193 s = struct.calcsize(indexformat)
194 194 l = len(i) / s
195 195 self.index = [None] * l
196 196 m = [None] * l
197 197
198 198 n = 0
199 199 for f in xrange(0, len(i), s):
200 200 # offset, size, base, linkrev, p1, p2, nodeid
201 201 e = struct.unpack(indexformat, i[f:f + s])
202 202 m[n] = (e[6], n)
203 203 self.index[n] = e
204 204 n += 1
205 205
206 206 self.nodemap = dict(m)
207 207 self.nodemap[nullid] = -1
208 208
209 209 def tip(self): return self.node(len(self.index) - 1)
210 210 def count(self): return len(self.index)
211 211 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
212 212 def rev(self, node):
213 213 try:
214 214 return self.nodemap[node]
215 215 except KeyError:
216 216 raise RevlogError('%s: no node %s' % (self.indexfile, hex(node)))
217 217 def linkrev(self, node): return self.index[self.rev(node)][3]
218 218 def parents(self, node):
219 219 if node == nullid: return (nullid, nullid)
220 220 return self.index[self.rev(node)][4:6]
221 221
222 222 def start(self, rev): return self.index[rev][0]
223 223 def length(self, rev): return self.index[rev][1]
224 224 def end(self, rev): return self.start(rev) + self.length(rev)
225 225 def base(self, rev): return self.index[rev][2]
226 226
227 227 def reachable(self, rev, stop=None):
228 228 reachable = {}
229 229 visit = [rev]
230 230 reachable[rev] = 1
231 231 if stop:
232 232 stopn = self.rev(stop)
233 233 else:
234 234 stopn = 0
235 235 while visit:
236 236 n = visit.pop(0)
237 237 if n == stop:
238 238 continue
239 239 if n == nullid:
240 240 continue
241 241 for p in self.parents(n):
242 242 if self.rev(p) < stopn:
243 243 continue
244 244 if p not in reachable:
245 245 reachable[p] = 1
246 246 visit.append(p)
247 247 return reachable
248 248
249 249 def nodesbetween(self, roots=None, heads=None):
250 250 """Return a tuple containing three elements. Elements 1 and 2 contain
251 251 a final list bases and heads after all the unreachable ones have been
252 252 pruned. Element 0 contains a topologically sorted list of all
253 253
254 254 nodes that satisfy these constraints:
255 255 1. All nodes must be descended from a node in roots (the nodes on
256 256 roots are considered descended from themselves).
257 257 2. All nodes must also be ancestors of a node in heads (the nodes in
258 258 heads are considered to be their own ancestors).
259 259
260 260 If roots is unspecified, nullid is assumed as the only root.
261 261 If heads is unspecified, it is taken to be the output of the
262 262 heads method (i.e. a list of all nodes in the repository that
263 263 have no children)."""
264 nonodes = ([], [], [])
264 265 if roots is not None:
265 266 roots = list(roots)
267 if not roots:
268 return nonodes
266 269 lowestrev = min([self.rev(n) for n in roots])
267 270 else:
268 271 roots = [nullid] # Everybody's a descendent of nullid
269 272 lowestrev = -1
270 273 if (lowestrev == -1) and (heads is None):
271 274 # We want _all_ the nodes!
272 275 return ([self.node(r) for r in xrange(0, self.count())],
273 276 [nullid], list(self.heads()))
274 277 if heads is None:
275 278 # All nodes are ancestors, so the latest ancestor is the last
276 279 # node.
277 280 highestrev = self.count() - 1
278 281 # Set ancestors to None to signal that every node is an ancestor.
279 282 ancestors = None
280 283 # Set heads to an empty dictionary for later discovery of heads
281 284 heads = {}
282 285 else:
286 heads = list(heads)
287 if not heads:
288 return nonodes
283 289 ancestors = {}
284 290 # Start at the top and keep marking parents until we're done.
285 nodestotag = list(heads)
291 nodestotag = heads[:]
286 292 # Turn heads into a dictionary so we can remove 'fake' heads.
287 293 # Also, later we will be using it to filter out the heads we can't
288 294 # find from roots.
289 295 heads = dict.fromkeys(heads, 0)
290 296 # Remember where the top was so we can use it as a limit later.
291 297 highestrev = max([self.rev(n) for n in nodestotag])
292 298 while nodestotag:
293 299 # grab a node to tag
294 300 n = nodestotag.pop()
295 301 # Never tag nullid
296 302 if n == nullid:
297 303 continue
298 304 # A node's revision number represents its place in a
299 305 # topologically sorted list of nodes.
300 306 r = self.rev(n)
301 307 if r >= lowestrev:
302 308 if n not in ancestors:
303 309 # If we are possibly a descendent of one of the roots
304 310 # and we haven't already been marked as an ancestor
305 311 ancestors[n] = 1 # Mark as ancestor
306 312 # Add non-nullid parents to list of nodes to tag.
307 313 nodestotag.extend([p for p in self.parents(n) if
308 314 p != nullid])
309 315 elif n in heads: # We've seen it before, is it a fake head?
310 316 # So it is, real heads should not be the ancestors of
311 317 # any other heads.
312 318 heads.pop(n)
313 319 if not ancestors:
314 return ([], [], [])
320 return nonodes
315 321 # Now that we have our set of ancestors, we want to remove any
316 322 # roots that are not ancestors.
317 323
318 324 # If one of the roots was nullid, everything is included anyway.
319 325 if lowestrev > -1:
320 326 # But, since we weren't, let's recompute the lowest rev to not
321 327 # include roots that aren't ancestors.
322 328
323 329 # Filter out roots that aren't ancestors of heads
324 330 roots = [n for n in roots if n in ancestors]
325 331 # Recompute the lowest revision
326 332 if roots:
327 333 lowestrev = min([self.rev(n) for n in roots])
328 334 else:
329 335 # No more roots? Return empty list
330 return ([], [], [])
336 return nonodes
331 337 else:
332 338 # We are descending from nullid, and don't need to care about
333 339 # any other roots.
334 340 lowestrev = -1
335 341 roots = [nullid]
336 342 # Transform our roots list into a 'set' (i.e. a dictionary where the
337 343 # values don't matter.
338 344 descendents = dict.fromkeys(roots, 1)
339 345 # Also, keep the original roots so we can filter out roots that aren't
340 346 # 'real' roots (i.e. are descended from other roots).
341 347 roots = descendents.copy()
342 348 # Our topologically sorted list of output nodes.
343 349 orderedout = []
344 350 # Don't start at nullid since we don't want nullid in our output list,
345 351 # and if nullid shows up in descedents, empty parents will look like
346 352 # they're descendents.
347 353 for r in xrange(max(lowestrev, 0), highestrev + 1):
348 354 n = self.node(r)
349 355 isdescendent = False
350 356 if lowestrev == -1: # Everybody is a descendent of nullid
351 357 isdescendent = True
352 358 elif n in descendents:
353 359 # n is already a descendent
354 360 isdescendent = True
355 361 # This check only needs to be done here because all the roots
356 362 # will start being marked is descendents before the loop.
357 363 if n in roots:
358 364 # If n was a root, check if it's a 'real' root.
359 365 p = tuple(self.parents(n))
360 366 # If any of its parents are descendents, it's not a root.
361 367 if (p[0] in descendents) or (p[1] in descendents):
362 368 roots.pop(n)
363 369 else:
364 370 p = tuple(self.parents(n))
365 371 # A node is a descendent if either of its parents are
366 372 # descendents. (We seeded the dependents list with the roots
367 373 # up there, remember?)
368 374 if (p[0] in descendents) or (p[1] in descendents):
369 375 descendents[n] = 1
370 376 isdescendent = True
371 377 if isdescendent and ((ancestors is None) or (n in ancestors)):
372 378 # Only include nodes that are both descendents and ancestors.
373 379 orderedout.append(n)
374 380 if (ancestors is not None) and (n in heads):
375 381 # We're trying to figure out which heads are reachable
376 382 # from roots.
377 383 # Mark this head as having been reached
378 384 heads[n] = 1
379 385 elif ancestors is None:
380 386 # Otherwise, we're trying to discover the heads.
381 387 # Assume this is a head because if it isn't, the next step
382 388 # will eventually remove it.
383 389 heads[n] = 1
384 390 # But, obviously its parents aren't.
385 391 for p in self.parents(n):
386 392 heads.pop(p, None)
387 393 heads = [n for n in heads.iterkeys() if heads[n] != 0]
388 394 roots = roots.keys()
389 395 assert orderedout
390 396 assert roots
391 397 assert heads
392 398 return (orderedout, roots, heads)
393 399
394 400 def heads(self, stop=None):
395 401 """return the list of all nodes that have no children"""
396 402 p = {}
397 403 h = []
398 404 stoprev = 0
399 405 if stop and stop in self.nodemap:
400 406 stoprev = self.rev(stop)
401 407
402 408 for r in range(self.count() - 1, -1, -1):
403 409 n = self.node(r)
404 410 if n not in p:
405 411 h.append(n)
406 412 if n == stop:
407 413 break
408 414 if r < stoprev:
409 415 break
410 416 for pn in self.parents(n):
411 417 p[pn] = 1
412 418 return h
413 419
414 420 def children(self, node):
415 421 """find the children of a given node"""
416 422 c = []
417 423 p = self.rev(node)
418 424 for r in range(p + 1, self.count()):
419 425 n = self.node(r)
420 426 for pn in self.parents(n):
421 427 if pn == node:
422 428 c.append(n)
423 429 continue
424 430 elif pn == nullid:
425 431 continue
426 432 return c
427 433
428 434 def lookup(self, id):
429 435 """locate a node based on revision number or subset of hex nodeid"""
430 436 try:
431 437 rev = int(id)
432 438 if str(rev) != id: raise ValueError
433 439 if rev < 0: rev = self.count() + rev
434 440 if rev < 0 or rev >= self.count(): raise ValueError
435 441 return self.node(rev)
436 442 except (ValueError, OverflowError):
437 443 c = []
438 444 for n in self.nodemap:
439 445 if hex(n).startswith(id):
440 446 c.append(n)
441 447 if len(c) > 1: raise KeyError("Ambiguous identifier")
442 448 if len(c) < 1: raise KeyError("No match found")
443 449 return c[0]
444 450
445 451 return None
446 452
447 453 def diff(self, a, b):
448 454 """return a delta between two revisions"""
449 455 return mdiff.textdiff(a, b)
450 456
451 457 def patches(self, t, pl):
452 458 """apply a list of patches to a string"""
453 459 return mdiff.patches(t, pl)
454 460
455 461 def delta(self, node):
456 462 """return or calculate a delta between a node and its predecessor"""
457 463 r = self.rev(node)
458 464 b = self.base(r)
459 465 if r == b:
460 466 return self.diff(self.revision(self.node(r - 1)),
461 467 self.revision(node))
462 468 else:
463 469 f = self.opener(self.datafile)
464 470 f.seek(self.start(r))
465 471 data = f.read(self.length(r))
466 472 return decompress(data)
467 473
468 474 def revision(self, node):
469 475 """return an uncompressed revision of a given"""
470 476 if node == nullid: return ""
471 477 if self.cache and self.cache[0] == node: return self.cache[2]
472 478
473 479 # look up what we need to read
474 480 text = None
475 481 rev = self.rev(node)
476 482 start, length, base, link, p1, p2, node = self.index[rev]
477 483 end = start + length
478 484 if base != rev: start = self.start(base)
479 485
480 486 # do we have useful data cached?
481 487 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
482 488 base = self.cache[1]
483 489 start = self.start(base + 1)
484 490 text = self.cache[2]
485 491 last = 0
486 492
487 493 f = self.opener(self.datafile)
488 494 f.seek(start)
489 495 data = f.read(end - start)
490 496
491 497 if text is None:
492 498 last = self.length(base)
493 499 text = decompress(data[:last])
494 500
495 501 bins = []
496 502 for r in xrange(base + 1, rev + 1):
497 503 s = self.length(r)
498 504 bins.append(decompress(data[last:last + s]))
499 505 last = last + s
500 506
501 507 text = mdiff.patches(text, bins)
502 508
503 509 if node != hash(text, p1, p2):
504 510 raise RevlogError("integrity check failed on %s:%d"
505 511 % (self.datafile, rev))
506 512
507 513 self.cache = (node, rev, text)
508 514 return text
509 515
510 516 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
511 517 """add a revision to the log
512 518
513 519 text - the revision data to add
514 520 transaction - the transaction object used for rollback
515 521 link - the linkrev data to add
516 522 p1, p2 - the parent nodeids of the revision
517 523 d - an optional precomputed delta
518 524 """
519 525 if text is None: text = ""
520 526 if p1 is None: p1 = self.tip()
521 527 if p2 is None: p2 = nullid
522 528
523 529 node = hash(text, p1, p2)
524 530
525 531 if node in self.nodemap:
526 532 return node
527 533
528 534 n = self.count()
529 535 t = n - 1
530 536
531 537 if n:
532 538 base = self.base(t)
533 539 start = self.start(base)
534 540 end = self.end(t)
535 541 if not d:
536 542 prev = self.revision(self.tip())
537 543 d = self.diff(prev, text)
538 544 data = compress(d)
539 545 dist = end - start + len(data)
540 546
541 547 # full versions are inserted when the needed deltas
542 548 # become comparable to the uncompressed text
543 549 if not n or dist > len(text) * 2:
544 550 data = compress(text)
545 551 base = n
546 552 else:
547 553 base = self.base(t)
548 554
549 555 offset = 0
550 556 if t >= 0:
551 557 offset = self.end(t)
552 558
553 559 e = (offset, len(data), base, link, p1, p2, node)
554 560
555 561 self.index.append(e)
556 562 self.nodemap[node] = n
557 563 entry = struct.pack(indexformat, *e)
558 564
559 565 transaction.add(self.datafile, e[0])
560 566 self.opener(self.datafile, "a").write(data)
561 567 transaction.add(self.indexfile, n * len(entry))
562 568 self.opener(self.indexfile, "a").write(entry)
563 569
564 570 self.cache = (node, n, text)
565 571 return node
566 572
567 573 def ancestor(self, a, b):
568 574 """calculate the least common ancestor of nodes a and b"""
569 575 # calculate the distance of every node from root
570 576 dist = {nullid: 0}
571 577 for i in xrange(self.count()):
572 578 n = self.node(i)
573 579 p1, p2 = self.parents(n)
574 580 dist[n] = max(dist[p1], dist[p2]) + 1
575 581
576 582 # traverse ancestors in order of decreasing distance from root
577 583 def ancestors(node):
578 584 # we store negative distances because heap returns smallest member
579 585 h = [(-dist[node], node)]
580 586 seen = {}
581 587 earliest = self.count()
582 588 while h:
583 589 d, n = heapq.heappop(h)
584 590 if n not in seen:
585 591 seen[n] = 1
586 592 r = self.rev(n)
587 593 yield (-d, n)
588 594 for p in self.parents(n):
589 595 heapq.heappush(h, (-dist[p], p))
590 596
591 597 def generations(node):
592 598 sg, s = None, {}
593 599 for g,n in ancestors(node):
594 600 if g != sg:
595 601 if sg:
596 602 yield sg, s
597 603 sg, s = g, {n:1}
598 604 else:
599 605 s[n] = 1
600 606 yield sg, s
601 607
602 608 x = generations(a)
603 609 y = generations(b)
604 610 gx = x.next()
605 611 gy = y.next()
606 612
607 613 # increment each ancestor list until it is closer to root than
608 614 # the other, or they match
609 615 while 1:
610 616 #print "ancestor gen %s %s" % (gx[0], gy[0])
611 617 if gx[0] == gy[0]:
612 618 # find the intersection
613 619 i = [ n for n in gx[1] if n in gy[1] ]
614 620 if i:
615 621 return i[0]
616 622 else:
617 623 #print "next"
618 624 gy = y.next()
619 625 gx = x.next()
620 626 elif gx[0] < gy[0]:
621 627 #print "next y"
622 628 gy = y.next()
623 629 else:
624 630 #print "next x"
625 631 gx = x.next()
626 632
627 633 def group(self, nodelist, lookup, infocollect = None):
628 634 """calculate a delta group
629 635
630 636 Given a list of changeset revs, return a set of deltas and
631 637 metadata corresponding to nodes. the first delta is
632 638 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
633 639 have this parent as it has all history before these
634 640 changesets. parent is parent[0]
635 641 """
636 642 revs = [self.rev(n) for n in nodelist]
637 643 needed = dict.fromkeys(revs, 1)
638 644
639 645 # if we don't have any revisions touched by these changesets, bail
640 646 if not revs:
641 647 yield struct.pack(">l", 0)
642 648 return
643 649
644 650 # add the parent of the first rev
645 651 p = self.parents(self.node(revs[0]))[0]
646 652 revs.insert(0, self.rev(p))
647 653
648 654 # for each delta that isn't contiguous in the log, we need to
649 655 # reconstruct the base, reconstruct the result, and then
650 656 # calculate the delta. We also need to do this where we've
651 657 # stored a full version and not a delta
652 658 for i in xrange(0, len(revs) - 1):
653 659 a, b = revs[i], revs[i + 1]
654 660 if a + 1 != b or self.base(b) == b:
655 661 for j in xrange(self.base(a), a + 1):
656 662 needed[j] = 1
657 663 for j in xrange(self.base(b), b + 1):
658 664 needed[j] = 1
659 665
660 666 # calculate spans to retrieve from datafile
661 667 needed = needed.keys()
662 668 needed.sort()
663 669 spans = []
664 670 oo = -1
665 671 ol = 0
666 672 for n in needed:
667 673 if n < 0: continue
668 674 o = self.start(n)
669 675 l = self.length(n)
670 676 if oo + ol == o: # can we merge with the previous?
671 677 nl = spans[-1][2]
672 678 nl.append((n, l))
673 679 ol += l
674 680 spans[-1] = (oo, ol, nl)
675 681 else:
676 682 oo = o
677 683 ol = l
678 684 spans.append((oo, ol, [(n, l)]))
679 685
680 686 # read spans in, divide up chunks
681 687 chunks = {}
682 688 for span in spans:
683 689 # we reopen the file for each span to make http happy for now
684 690 f = self.opener(self.datafile)
685 691 f.seek(span[0])
686 692 data = f.read(span[1])
687 693
688 694 # divide up the span
689 695 pos = 0
690 696 for r, l in span[2]:
691 697 chunks[r] = decompress(data[pos: pos + l])
692 698 pos += l
693 699
694 700 # helper to reconstruct intermediate versions
695 701 def construct(text, base, rev):
696 702 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
697 703 return mdiff.patches(text, bins)
698 704
699 705 # build deltas
700 706 deltas = []
701 707 for d in xrange(0, len(revs) - 1):
702 708 a, b = revs[d], revs[d + 1]
703 709 n = self.node(b)
704 710
705 711 if infocollect is not None:
706 712 infocollect(n)
707 713
708 714 # do we need to construct a new delta?
709 715 if a + 1 != b or self.base(b) == b:
710 716 if a >= 0:
711 717 base = self.base(a)
712 718 ta = chunks[self.base(a)]
713 719 ta = construct(ta, base, a)
714 720 else:
715 721 ta = ""
716 722
717 723 base = self.base(b)
718 724 if a > base:
719 725 base = a
720 726 tb = ta
721 727 else:
722 728 tb = chunks[self.base(b)]
723 729 tb = construct(tb, base, b)
724 730 d = self.diff(ta, tb)
725 731 else:
726 732 d = chunks[b]
727 733
728 734 p = self.parents(n)
729 735 meta = n + p[0] + p[1] + lookup(n)
730 736 l = struct.pack(">l", len(meta) + len(d) + 4)
731 737 yield l
732 738 yield meta
733 739 yield d
734 740
735 741 yield struct.pack(">l", 0)
736 742
737 743 def addgroup(self, revs, linkmapper, transaction, unique=0):
738 744 """
739 745 add a delta group
740 746
741 747 given a set of deltas, add them to the revision log. the
742 748 first delta is against its parent, which should be in our
743 749 log, the rest are against the previous delta.
744 750 """
745 751
746 752 #track the base of the current delta log
747 753 r = self.count()
748 754 t = r - 1
749 755 node = nullid
750 756
751 757 base = prev = -1
752 758 start = end = measure = 0
753 759 if r:
754 760 start = self.start(self.base(t))
755 761 end = self.end(t)
756 762 measure = self.length(self.base(t))
757 763 base = self.base(t)
758 764 prev = self.tip()
759 765
760 766 transaction.add(self.datafile, end)
761 767 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
762 768 dfh = self.opener(self.datafile, "a")
763 769 ifh = self.opener(self.indexfile, "a")
764 770
765 771 # loop through our set of deltas
766 772 chain = None
767 773 for chunk in revs:
768 774 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
769 775 link = linkmapper(cs)
770 776 if node in self.nodemap:
771 777 # this can happen if two branches make the same change
772 778 # if unique:
773 779 # raise RevlogError("already have %s" % hex(node[:4]))
774 780 chain = node
775 781 continue
776 782 delta = chunk[80:]
777 783
778 784 if not chain:
779 785 # retrieve the parent revision of the delta chain
780 786 chain = p1
781 787 if not chain in self.nodemap:
782 788 raise RevlogError("unknown base %s" % short(chain[:4]))
783 789
784 790 # full versions are inserted when the needed deltas become
785 791 # comparable to the uncompressed text or when the previous
786 792 # version is not the one we have a delta against. We use
787 793 # the size of the previous full rev as a proxy for the
788 794 # current size.
789 795
790 796 if chain == prev:
791 797 cdelta = compress(delta)
792 798
793 799 if chain != prev or (end - start + len(cdelta)) > measure * 2:
794 800 # flush our writes here so we can read it in revision
795 801 dfh.flush()
796 802 ifh.flush()
797 803 text = self.revision(chain)
798 804 text = self.patches(text, [delta])
799 805 chk = self.addrevision(text, transaction, link, p1, p2)
800 806 if chk != node:
801 807 raise RevlogError("consistency error adding group")
802 808 measure = len(text)
803 809 else:
804 810 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
805 811 self.index.append(e)
806 812 self.nodemap[node] = r
807 813 dfh.write(cdelta)
808 814 ifh.write(struct.pack(indexformat, *e))
809 815
810 816 t, r, chain, prev = r, r + 1, node, node
811 817 start = self.start(self.base(t))
812 818 end = self.end(t)
813 819
814 820 dfh.close()
815 821 ifh.close()
816 822 return node
General Comments 0
You need to be logged in to leave comments. Login now