##// END OF EJS Templates
Reduce string duplication in compression code...
mason@suse.com -
r1533:3d11f81c default
parent child Browse files
Show More
@@ -1,845 +1,851 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
17 17
18 18 def hash(text, p1, p2):
19 19 """generate a hash from the given text and its parent hashes
20 20
21 21 This hash combines both the current file contents and its history
22 22 in a manner that makes it easy to distinguish nodes with the same
23 23 content in the revision graph.
24 24 """
25 25 l = [p1, p2]
26 26 l.sort()
27 27 s = sha.new(l[0])
28 28 s.update(l[1])
29 29 s.update(text)
30 30 return s.digest()
31 31
32 32 def compress(text):
33 33 """ generate a possibly-compressed representation of text """
34 if not text: return text
34 if not text: return ("", text)
35 35 if len(text) < 44:
36 if text[0] == '\0': return text
37 return 'u' + text
36 if text[0] == '\0': return ("", text)
37 return ('u', text)
38 38 bin = zlib.compress(text)
39 39 if len(bin) > len(text):
40 if text[0] == '\0': return text
41 return 'u' + text
42 return bin
40 if text[0] == '\0': return ("", text)
41 return ('u', text)
42 return ("", bin)
43 43
44 44 def decompress(bin):
45 45 """ decompress the given input """
46 46 if not bin: return bin
47 47 t = bin[0]
48 48 if t == '\0': return bin
49 49 if t == 'x': return zlib.decompress(bin)
50 50 if t == 'u': return bin[1:]
51 51 raise RevlogError(_("unknown compression type %s") % t)
52 52
53 53 indexformat = ">4l20s20s20s"
54 54
55 55 class lazyparser:
56 56 """
57 57 this class avoids the need to parse the entirety of large indices
58 58
59 59 By default we parse and load 1000 entries at a time.
60 60
61 61 If no position is specified, we load the whole index, and replace
62 62 the lazy objects in revlog with the underlying objects for
63 63 efficiency in cases where we look at most of the nodes.
64 64 """
65 65 def __init__(self, data, revlog):
66 66 self.data = data
67 67 self.s = struct.calcsize(indexformat)
68 68 self.l = len(data)/self.s
69 69 self.index = [None] * self.l
70 70 self.map = {nullid: -1}
71 71 self.all = 0
72 72 self.revlog = revlog
73 73
74 74 def load(self, pos=None):
75 75 if self.all: return
76 76 if pos is not None:
77 77 block = pos / 1000
78 78 i = block * 1000
79 79 end = min(self.l, i + 1000)
80 80 else:
81 81 self.all = 1
82 82 i = 0
83 83 end = self.l
84 84 self.revlog.index = self.index
85 85 self.revlog.nodemap = self.map
86 86
87 87 while i < end:
88 88 d = self.data[i * self.s: (i + 1) * self.s]
89 89 e = struct.unpack(indexformat, d)
90 90 self.index[i] = e
91 91 self.map[e[6]] = i
92 92 i += 1
93 93
94 94 class lazyindex:
95 95 """a lazy version of the index array"""
96 96 def __init__(self, parser):
97 97 self.p = parser
98 98 def __len__(self):
99 99 return len(self.p.index)
100 100 def load(self, pos):
101 101 if pos < 0:
102 102 pos += len(self.p.index)
103 103 self.p.load(pos)
104 104 return self.p.index[pos]
105 105 def __getitem__(self, pos):
106 106 return self.p.index[pos] or self.load(pos)
107 107 def append(self, e):
108 108 self.p.index.append(e)
109 109
110 110 class lazymap:
111 111 """a lazy version of the node map"""
112 112 def __init__(self, parser):
113 113 self.p = parser
114 114 def load(self, key):
115 115 if self.p.all: return
116 116 n = self.p.data.find(key)
117 117 if n < 0:
118 118 raise KeyError(key)
119 119 pos = n / self.p.s
120 120 self.p.load(pos)
121 121 def __contains__(self, key):
122 122 self.p.load()
123 123 return key in self.p.map
124 124 def __iter__(self):
125 125 yield nullid
126 126 for i in xrange(self.p.l):
127 127 try:
128 128 yield self.p.index[i][6]
129 129 except:
130 130 self.p.load(i)
131 131 yield self.p.index[i][6]
132 132 def __getitem__(self, key):
133 133 try:
134 134 return self.p.map[key]
135 135 except KeyError:
136 136 try:
137 137 self.load(key)
138 138 return self.p.map[key]
139 139 except KeyError:
140 140 raise KeyError("node " + hex(key))
141 141 def __setitem__(self, key, val):
142 142 self.p.map[key] = val
143 143
144 144 class RevlogError(Exception): pass
145 145
146 146 class revlog:
147 147 """
148 148 the underlying revision storage object
149 149
150 150 A revlog consists of two parts, an index and the revision data.
151 151
152 152 The index is a file with a fixed record size containing
153 153 information on each revision, includings its nodeid (hash), the
154 154 nodeids of its parents, the position and offset of its data within
155 155 the data file, and the revision it's based on. Finally, each entry
156 156 contains a linkrev entry that can serve as a pointer to external
157 157 data.
158 158
159 159 The revision data itself is a linear collection of data chunks.
160 160 Each chunk represents a revision and is usually represented as a
161 161 delta against the previous chunk. To bound lookup time, runs of
162 162 deltas are limited to about 2 times the length of the original
163 163 version data. This makes retrieval of a version proportional to
164 164 its size, or O(1) relative to the number of revisions.
165 165
166 166 Both pieces of the revlog are written to in an append-only
167 167 fashion, which means we never need to rewrite a file to insert or
168 168 remove data, and can use some simple techniques to avoid the need
169 169 for locking while reading.
170 170 """
171 171 def __init__(self, opener, indexfile, datafile):
172 172 """
173 173 create a revlog object
174 174
175 175 opener is a function that abstracts the file opening operation
176 176 and can be used to implement COW semantics or the like.
177 177 """
178 178 self.indexfile = indexfile
179 179 self.datafile = datafile
180 180 self.opener = opener
181 181 self.cache = None
182 182
183 183 try:
184 184 i = self.opener(self.indexfile).read()
185 185 except IOError, inst:
186 186 if inst.errno != errno.ENOENT:
187 187 raise
188 188 i = ""
189 189
190 190 if len(i) > 10000:
191 191 # big index, let's parse it on demand
192 192 parser = lazyparser(i, self)
193 193 self.index = lazyindex(parser)
194 194 self.nodemap = lazymap(parser)
195 195 else:
196 196 s = struct.calcsize(indexformat)
197 197 l = len(i) / s
198 198 self.index = [None] * l
199 199 m = [None] * l
200 200
201 201 n = 0
202 202 for f in xrange(0, len(i), s):
203 203 # offset, size, base, linkrev, p1, p2, nodeid
204 204 e = struct.unpack(indexformat, i[f:f + s])
205 205 m[n] = (e[6], n)
206 206 self.index[n] = e
207 207 n += 1
208 208
209 209 self.nodemap = dict(m)
210 210 self.nodemap[nullid] = -1
211 211
212 212 def tip(self): return self.node(len(self.index) - 1)
213 213 def count(self): return len(self.index)
214 214 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
215 215 def rev(self, node):
216 216 try:
217 217 return self.nodemap[node]
218 218 except KeyError:
219 219 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
220 220 def linkrev(self, node): return self.index[self.rev(node)][3]
221 221 def parents(self, node):
222 222 if node == nullid: return (nullid, nullid)
223 223 return self.index[self.rev(node)][4:6]
224 224
225 225 def start(self, rev): return self.index[rev][0]
226 226 def length(self, rev): return self.index[rev][1]
227 227 def end(self, rev): return self.start(rev) + self.length(rev)
228 228 def base(self, rev): return self.index[rev][2]
229 229
230 230 def reachable(self, rev, stop=None):
231 231 reachable = {}
232 232 visit = [rev]
233 233 reachable[rev] = 1
234 234 if stop:
235 235 stopn = self.rev(stop)
236 236 else:
237 237 stopn = 0
238 238 while visit:
239 239 n = visit.pop(0)
240 240 if n == stop:
241 241 continue
242 242 if n == nullid:
243 243 continue
244 244 for p in self.parents(n):
245 245 if self.rev(p) < stopn:
246 246 continue
247 247 if p not in reachable:
248 248 reachable[p] = 1
249 249 visit.append(p)
250 250 return reachable
251 251
252 252 def nodesbetween(self, roots=None, heads=None):
253 253 """Return a tuple containing three elements. Elements 1 and 2 contain
254 254 a final list bases and heads after all the unreachable ones have been
255 255 pruned. Element 0 contains a topologically sorted list of all
256 256
257 257 nodes that satisfy these constraints:
258 258 1. All nodes must be descended from a node in roots (the nodes on
259 259 roots are considered descended from themselves).
260 260 2. All nodes must also be ancestors of a node in heads (the nodes in
261 261 heads are considered to be their own ancestors).
262 262
263 263 If roots is unspecified, nullid is assumed as the only root.
264 264 If heads is unspecified, it is taken to be the output of the
265 265 heads method (i.e. a list of all nodes in the repository that
266 266 have no children)."""
267 267 nonodes = ([], [], [])
268 268 if roots is not None:
269 269 roots = list(roots)
270 270 if not roots:
271 271 return nonodes
272 272 lowestrev = min([self.rev(n) for n in roots])
273 273 else:
274 274 roots = [nullid] # Everybody's a descendent of nullid
275 275 lowestrev = -1
276 276 if (lowestrev == -1) and (heads is None):
277 277 # We want _all_ the nodes!
278 278 return ([self.node(r) for r in xrange(0, self.count())],
279 279 [nullid], list(self.heads()))
280 280 if heads is None:
281 281 # All nodes are ancestors, so the latest ancestor is the last
282 282 # node.
283 283 highestrev = self.count() - 1
284 284 # Set ancestors to None to signal that every node is an ancestor.
285 285 ancestors = None
286 286 # Set heads to an empty dictionary for later discovery of heads
287 287 heads = {}
288 288 else:
289 289 heads = list(heads)
290 290 if not heads:
291 291 return nonodes
292 292 ancestors = {}
293 293 # Start at the top and keep marking parents until we're done.
294 294 nodestotag = heads[:]
295 295 # Turn heads into a dictionary so we can remove 'fake' heads.
296 296 # Also, later we will be using it to filter out the heads we can't
297 297 # find from roots.
298 298 heads = dict.fromkeys(heads, 0)
299 299 # Remember where the top was so we can use it as a limit later.
300 300 highestrev = max([self.rev(n) for n in nodestotag])
301 301 while nodestotag:
302 302 # grab a node to tag
303 303 n = nodestotag.pop()
304 304 # Never tag nullid
305 305 if n == nullid:
306 306 continue
307 307 # A node's revision number represents its place in a
308 308 # topologically sorted list of nodes.
309 309 r = self.rev(n)
310 310 if r >= lowestrev:
311 311 if n not in ancestors:
312 312 # If we are possibly a descendent of one of the roots
313 313 # and we haven't already been marked as an ancestor
314 314 ancestors[n] = 1 # Mark as ancestor
315 315 # Add non-nullid parents to list of nodes to tag.
316 316 nodestotag.extend([p for p in self.parents(n) if
317 317 p != nullid])
318 318 elif n in heads: # We've seen it before, is it a fake head?
319 319 # So it is, real heads should not be the ancestors of
320 320 # any other heads.
321 321 heads.pop(n)
322 322 if not ancestors:
323 323 return nonodes
324 324 # Now that we have our set of ancestors, we want to remove any
325 325 # roots that are not ancestors.
326 326
327 327 # If one of the roots was nullid, everything is included anyway.
328 328 if lowestrev > -1:
329 329 # But, since we weren't, let's recompute the lowest rev to not
330 330 # include roots that aren't ancestors.
331 331
332 332 # Filter out roots that aren't ancestors of heads
333 333 roots = [n for n in roots if n in ancestors]
334 334 # Recompute the lowest revision
335 335 if roots:
336 336 lowestrev = min([self.rev(n) for n in roots])
337 337 else:
338 338 # No more roots? Return empty list
339 339 return nonodes
340 340 else:
341 341 # We are descending from nullid, and don't need to care about
342 342 # any other roots.
343 343 lowestrev = -1
344 344 roots = [nullid]
345 345 # Transform our roots list into a 'set' (i.e. a dictionary where the
346 346 # values don't matter.
347 347 descendents = dict.fromkeys(roots, 1)
348 348 # Also, keep the original roots so we can filter out roots that aren't
349 349 # 'real' roots (i.e. are descended from other roots).
350 350 roots = descendents.copy()
351 351 # Our topologically sorted list of output nodes.
352 352 orderedout = []
353 353 # Don't start at nullid since we don't want nullid in our output list,
354 354 # and if nullid shows up in descedents, empty parents will look like
355 355 # they're descendents.
356 356 for r in xrange(max(lowestrev, 0), highestrev + 1):
357 357 n = self.node(r)
358 358 isdescendent = False
359 359 if lowestrev == -1: # Everybody is a descendent of nullid
360 360 isdescendent = True
361 361 elif n in descendents:
362 362 # n is already a descendent
363 363 isdescendent = True
364 364 # This check only needs to be done here because all the roots
365 365 # will start being marked is descendents before the loop.
366 366 if n in roots:
367 367 # If n was a root, check if it's a 'real' root.
368 368 p = tuple(self.parents(n))
369 369 # If any of its parents are descendents, it's not a root.
370 370 if (p[0] in descendents) or (p[1] in descendents):
371 371 roots.pop(n)
372 372 else:
373 373 p = tuple(self.parents(n))
374 374 # A node is a descendent if either of its parents are
375 375 # descendents. (We seeded the dependents list with the roots
376 376 # up there, remember?)
377 377 if (p[0] in descendents) or (p[1] in descendents):
378 378 descendents[n] = 1
379 379 isdescendent = True
380 380 if isdescendent and ((ancestors is None) or (n in ancestors)):
381 381 # Only include nodes that are both descendents and ancestors.
382 382 orderedout.append(n)
383 383 if (ancestors is not None) and (n in heads):
384 384 # We're trying to figure out which heads are reachable
385 385 # from roots.
386 386 # Mark this head as having been reached
387 387 heads[n] = 1
388 388 elif ancestors is None:
389 389 # Otherwise, we're trying to discover the heads.
390 390 # Assume this is a head because if it isn't, the next step
391 391 # will eventually remove it.
392 392 heads[n] = 1
393 393 # But, obviously its parents aren't.
394 394 for p in self.parents(n):
395 395 heads.pop(p, None)
396 396 heads = [n for n in heads.iterkeys() if heads[n] != 0]
397 397 roots = roots.keys()
398 398 assert orderedout
399 399 assert roots
400 400 assert heads
401 401 return (orderedout, roots, heads)
402 402
403 403 def heads(self, stop=None):
404 404 """return the list of all nodes that have no children"""
405 405 p = {}
406 406 h = []
407 407 stoprev = 0
408 408 if stop and stop in self.nodemap:
409 409 stoprev = self.rev(stop)
410 410
411 411 for r in range(self.count() - 1, -1, -1):
412 412 n = self.node(r)
413 413 if n not in p:
414 414 h.append(n)
415 415 if n == stop:
416 416 break
417 417 if r < stoprev:
418 418 break
419 419 for pn in self.parents(n):
420 420 p[pn] = 1
421 421 return h
422 422
423 423 def children(self, node):
424 424 """find the children of a given node"""
425 425 c = []
426 426 p = self.rev(node)
427 427 for r in range(p + 1, self.count()):
428 428 n = self.node(r)
429 429 for pn in self.parents(n):
430 430 if pn == node:
431 431 c.append(n)
432 432 continue
433 433 elif pn == nullid:
434 434 continue
435 435 return c
436 436
437 437 def lookup(self, id):
438 438 """locate a node based on revision number or subset of hex nodeid"""
439 439 try:
440 440 rev = int(id)
441 441 if str(rev) != id: raise ValueError
442 442 if rev < 0: rev = self.count() + rev
443 443 if rev < 0 or rev >= self.count(): raise ValueError
444 444 return self.node(rev)
445 445 except (ValueError, OverflowError):
446 446 c = []
447 447 for n in self.nodemap:
448 448 if hex(n).startswith(id):
449 449 c.append(n)
450 450 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
451 451 if len(c) < 1: raise RevlogError(_("No match found"))
452 452 return c[0]
453 453
454 454 return None
455 455
456 456 def diff(self, a, b):
457 457 """return a delta between two revisions"""
458 458 return mdiff.textdiff(a, b)
459 459
460 460 def patches(self, t, pl):
461 461 """apply a list of patches to a string"""
462 462 return mdiff.patches(t, pl)
463 463
464 464 def delta(self, node):
465 465 """return or calculate a delta between a node and its predecessor"""
466 466 r = self.rev(node)
467 467 b = self.base(r)
468 468 if r == b:
469 469 return self.diff(self.revision(self.node(r - 1)),
470 470 self.revision(node))
471 471 else:
472 472 f = self.opener(self.datafile)
473 473 f.seek(self.start(r))
474 474 data = f.read(self.length(r))
475 475 return decompress(data)
476 476
477 477 def revision(self, node):
478 478 """return an uncompressed revision of a given"""
479 479 if node == nullid: return ""
480 480 if self.cache and self.cache[0] == node: return self.cache[2]
481 481
482 482 # look up what we need to read
483 483 text = None
484 484 rev = self.rev(node)
485 485 start, length, base, link, p1, p2, node = self.index[rev]
486 486 end = start + length
487 487 if base != rev: start = self.start(base)
488 488
489 489 # do we have useful data cached?
490 490 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
491 491 base = self.cache[1]
492 492 start = self.start(base + 1)
493 493 text = self.cache[2]
494 494 last = 0
495 495
496 496 f = self.opener(self.datafile)
497 497 f.seek(start)
498 498 data = f.read(end - start)
499 499
500 500 if text is None:
501 501 last = self.length(base)
502 502 text = decompress(data[:last])
503 503
504 504 bins = []
505 505 for r in xrange(base + 1, rev + 1):
506 506 s = self.length(r)
507 507 bins.append(decompress(data[last:last + s]))
508 508 last = last + s
509 509
510 510 text = mdiff.patches(text, bins)
511 511
512 512 if node != hash(text, p1, p2):
513 513 raise RevlogError(_("integrity check failed on %s:%d")
514 514 % (self.datafile, rev))
515 515
516 516 self.cache = (node, rev, text)
517 517 return text
518 518
519 519 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
520 520 """add a revision to the log
521 521
522 522 text - the revision data to add
523 523 transaction - the transaction object used for rollback
524 524 link - the linkrev data to add
525 525 p1, p2 - the parent nodeids of the revision
526 526 d - an optional precomputed delta
527 527 """
528 528 if text is None: text = ""
529 529 if p1 is None: p1 = self.tip()
530 530 if p2 is None: p2 = nullid
531 531
532 532 node = hash(text, p1, p2)
533 533
534 534 if node in self.nodemap:
535 535 return node
536 536
537 537 n = self.count()
538 538 t = n - 1
539 539
540 540 if n:
541 541 base = self.base(t)
542 542 start = self.start(base)
543 543 end = self.end(t)
544 544 if not d:
545 545 prev = self.revision(self.tip())
546 d = self.diff(prev, text)
546 d = self.diff(prev, str(text))
547 547 data = compress(d)
548 dist = end - start + len(data)
548 l = len(data[1]) + len(data[0])
549 dist = end - start + l
549 550
550 551 # full versions are inserted when the needed deltas
551 552 # become comparable to the uncompressed text
552 553 if not n or dist > len(text) * 2:
553 554 data = compress(text)
555 l = len(data[1]) + len(data[0])
554 556 base = n
555 557 else:
556 558 base = self.base(t)
557 559
558 560 offset = 0
559 561 if t >= 0:
560 562 offset = self.end(t)
561 563
562 e = (offset, len(data), base, link, p1, p2, node)
564 e = (offset, l, base, link, p1, p2, node)
563 565
564 566 self.index.append(e)
565 567 self.nodemap[node] = n
566 568 entry = struct.pack(indexformat, *e)
567 569
568 570 transaction.add(self.datafile, e[0])
569 self.opener(self.datafile, "a").write(data)
571 f = self.opener(self.datafile, "a")
572 if data[0]:
573 f.write(data[0])
574 f.write(data[1])
570 575 transaction.add(self.indexfile, n * len(entry))
571 576 self.opener(self.indexfile, "a").write(entry)
572 577
573 578 self.cache = (node, n, text)
574 579 return node
575 580
576 581 def ancestor(self, a, b):
577 582 """calculate the least common ancestor of nodes a and b"""
578 583 # calculate the distance of every node from root
579 584 dist = {nullid: 0}
580 585 for i in xrange(self.count()):
581 586 n = self.node(i)
582 587 p1, p2 = self.parents(n)
583 588 dist[n] = max(dist[p1], dist[p2]) + 1
584 589
585 590 # traverse ancestors in order of decreasing distance from root
586 591 def ancestors(node):
587 592 # we store negative distances because heap returns smallest member
588 593 h = [(-dist[node], node)]
589 594 seen = {}
590 595 earliest = self.count()
591 596 while h:
592 597 d, n = heapq.heappop(h)
593 598 if n not in seen:
594 599 seen[n] = 1
595 600 r = self.rev(n)
596 601 yield (-d, n)
597 602 for p in self.parents(n):
598 603 heapq.heappush(h, (-dist[p], p))
599 604
600 605 def generations(node):
601 606 sg, s = None, {}
602 607 for g,n in ancestors(node):
603 608 if g != sg:
604 609 if sg:
605 610 yield sg, s
606 611 sg, s = g, {n:1}
607 612 else:
608 613 s[n] = 1
609 614 yield sg, s
610 615
611 616 x = generations(a)
612 617 y = generations(b)
613 618 gx = x.next()
614 619 gy = y.next()
615 620
616 621 # increment each ancestor list until it is closer to root than
617 622 # the other, or they match
618 623 while 1:
619 624 #print "ancestor gen %s %s" % (gx[0], gy[0])
620 625 if gx[0] == gy[0]:
621 626 # find the intersection
622 627 i = [ n for n in gx[1] if n in gy[1] ]
623 628 if i:
624 629 return i[0]
625 630 else:
626 631 #print "next"
627 632 gy = y.next()
628 633 gx = x.next()
629 634 elif gx[0] < gy[0]:
630 635 #print "next y"
631 636 gy = y.next()
632 637 else:
633 638 #print "next x"
634 639 gx = x.next()
635 640
636 641 def group(self, nodelist, lookup, infocollect = None):
637 642 """calculate a delta group
638 643
639 644 Given a list of changeset revs, return a set of deltas and
640 645 metadata corresponding to nodes. the first delta is
641 646 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
642 647 have this parent as it has all history before these
643 648 changesets. parent is parent[0]
644 649 """
645 650 revs = [self.rev(n) for n in nodelist]
646 651 needed = dict.fromkeys(revs, 1)
647 652
648 653 # if we don't have any revisions touched by these changesets, bail
649 654 if not revs:
650 655 yield struct.pack(">l", 0)
651 656 return
652 657
653 658 # add the parent of the first rev
654 659 p = self.parents(self.node(revs[0]))[0]
655 660 revs.insert(0, self.rev(p))
656 661
657 662 # for each delta that isn't contiguous in the log, we need to
658 663 # reconstruct the base, reconstruct the result, and then
659 664 # calculate the delta. We also need to do this where we've
660 665 # stored a full version and not a delta
661 666 for i in xrange(0, len(revs) - 1):
662 667 a, b = revs[i], revs[i + 1]
663 668 if a + 1 != b or self.base(b) == b:
664 669 for j in xrange(self.base(a), a + 1):
665 670 needed[j] = 1
666 671 for j in xrange(self.base(b), b + 1):
667 672 needed[j] = 1
668 673
669 674 # calculate spans to retrieve from datafile
670 675 needed = needed.keys()
671 676 needed.sort()
672 677 spans = []
673 678 oo = -1
674 679 ol = 0
675 680 for n in needed:
676 681 if n < 0: continue
677 682 o = self.start(n)
678 683 l = self.length(n)
679 684 if oo + ol == o: # can we merge with the previous?
680 685 nl = spans[-1][2]
681 686 nl.append((n, l))
682 687 ol += l
683 688 spans[-1] = (oo, ol, nl)
684 689 else:
685 690 oo = o
686 691 ol = l
687 692 spans.append((oo, ol, [(n, l)]))
688 693
689 694 # read spans in, divide up chunks
690 695 chunks = {}
691 696 for span in spans:
692 697 # we reopen the file for each span to make http happy for now
693 698 f = self.opener(self.datafile)
694 699 f.seek(span[0])
695 700 data = f.read(span[1])
696 701
697 702 # divide up the span
698 703 pos = 0
699 704 for r, l in span[2]:
700 705 chunks[r] = decompress(data[pos: pos + l])
701 706 pos += l
702 707
703 708 # helper to reconstruct intermediate versions
704 709 def construct(text, base, rev):
705 710 bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
706 711 return mdiff.patches(text, bins)
707 712
708 713 # build deltas
709 714 deltas = []
710 715 for d in xrange(0, len(revs) - 1):
711 716 a, b = revs[d], revs[d + 1]
712 717 n = self.node(b)
713 718
714 719 if infocollect is not None:
715 720 infocollect(n)
716 721
717 722 # do we need to construct a new delta?
718 723 if a + 1 != b or self.base(b) == b:
719 724 if a >= 0:
720 725 base = self.base(a)
721 726 ta = chunks[self.base(a)]
722 727 ta = construct(ta, base, a)
723 728 else:
724 729 ta = ""
725 730
726 731 base = self.base(b)
727 732 if a > base:
728 733 base = a
729 734 tb = ta
730 735 else:
731 736 tb = chunks[self.base(b)]
732 737 tb = construct(tb, base, b)
733 738 d = self.diff(ta, tb)
734 739 else:
735 740 d = chunks[b]
736 741
737 742 p = self.parents(n)
738 743 meta = n + p[0] + p[1] + lookup(n)
739 744 l = struct.pack(">l", len(meta) + len(d) + 4)
740 745 yield l
741 746 yield meta
742 747 yield d
743 748
744 749 yield struct.pack(">l", 0)
745 750
746 751 def addgroup(self, revs, linkmapper, transaction, unique=0):
747 752 """
748 753 add a delta group
749 754
750 755 given a set of deltas, add them to the revision log. the
751 756 first delta is against its parent, which should be in our
752 757 log, the rest are against the previous delta.
753 758 """
754 759
755 760 #track the base of the current delta log
756 761 r = self.count()
757 762 t = r - 1
758 763 node = nullid
759 764
760 765 base = prev = -1
761 766 start = end = measure = 0
762 767 if r:
763 768 start = self.start(self.base(t))
764 769 end = self.end(t)
765 770 measure = self.length(self.base(t))
766 771 base = self.base(t)
767 772 prev = self.tip()
768 773
769 774 transaction.add(self.datafile, end)
770 775 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
771 776 dfh = self.opener(self.datafile, "a")
772 777 ifh = self.opener(self.indexfile, "a")
773 778
774 779 # loop through our set of deltas
775 780 chain = None
776 781 for chunk in revs:
777 782 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
778 783 link = linkmapper(cs)
779 784 if node in self.nodemap:
780 785 # this can happen if two branches make the same change
781 786 # if unique:
782 787 # raise RevlogError(_("already have %s") % hex(node[:4]))
783 788 chain = node
784 789 continue
785 790 delta = chunk[80:]
786 791
787 792 for p in (p1, p2):
788 793 if not p in self.nodemap:
789 794 raise RevlogError(_("unknown parent %s") % short(p1))
790 795
791 796 if not chain:
792 797 # retrieve the parent revision of the delta chain
793 798 chain = p1
794 799 if not chain in self.nodemap:
795 800 raise RevlogError(_("unknown base %s") % short(chain[:4]))
796 801
797 802 # full versions are inserted when the needed deltas become
798 803 # comparable to the uncompressed text or when the previous
799 804 # version is not the one we have a delta against. We use
800 805 # the size of the previous full rev as a proxy for the
801 806 # current size.
802 807
803 808 if chain == prev:
804 cdelta = compress(delta)
809 tempd = compress(delta)
810 cdelta = tempd[0] + tempd[1]
805 811
806 812 if chain != prev or (end - start + len(cdelta)) > measure * 2:
807 813 # flush our writes here so we can read it in revision
808 814 dfh.flush()
809 815 ifh.flush()
810 816 text = self.revision(chain)
811 817 text = self.patches(text, [delta])
812 818 chk = self.addrevision(text, transaction, link, p1, p2)
813 819 if chk != node:
814 820 raise RevlogError(_("consistency error adding group"))
815 821 measure = len(text)
816 822 else:
817 823 e = (end, len(cdelta), self.base(t), link, p1, p2, node)
818 824 self.index.append(e)
819 825 self.nodemap[node] = r
820 826 dfh.write(cdelta)
821 827 ifh.write(struct.pack(indexformat, *e))
822 828
823 829 t, r, chain, prev = r, r + 1, node, node
824 830 start = self.start(self.base(t))
825 831 end = self.end(t)
826 832
827 833 dfh.close()
828 834 ifh.close()
829 835 return node
830 836
831 837 def checksize(self):
832 838 expected = 0
833 839 if self.count():
834 840 expected = self.end(self.count() - 1)
835 841 try:
836 842 f = self.opener(self.datafile)
837 843 f.seek(0, 2)
838 844 actual = f.tell()
839 845 return expected - actual
840 846 except IOError, inst:
841 847 if inst.errno == errno.ENOENT:
842 848 return 0
843 849 raise
844 850
845 851
General Comments 0
You need to be logged in to leave comments. Login now