##// END OF EJS Templates
revlog.py: factorization and fixes for rev < 0 (nullid)
Benoit Boissinot -
r1941:75188237 default
parent child Browse files
Show More
@@ -1,882 +1,883 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii errno heapq mdiff os sha struct zlib")
17 17
18 18 def hash(text, p1, p2):
19 19 """generate a hash from the given text and its parent hashes
20 20
21 21 This hash combines both the current file contents and its history
22 22 in a manner that makes it easy to distinguish nodes with the same
23 23 content in the revision graph.
24 24 """
25 25 l = [p1, p2]
26 26 l.sort()
27 27 s = sha.new(l[0])
28 28 s.update(l[1])
29 29 s.update(text)
30 30 return s.digest()
31 31
32 32 def compress(text):
33 33 """ generate a possibly-compressed representation of text """
34 34 if not text: return ("", text)
35 35 if len(text) < 44:
36 36 if text[0] == '\0': return ("", text)
37 37 return ('u', text)
38 38 bin = zlib.compress(text)
39 39 if len(bin) > len(text):
40 40 if text[0] == '\0': return ("", text)
41 41 return ('u', text)
42 42 return ("", bin)
43 43
44 44 def decompress(bin):
45 45 """ decompress the given input """
46 46 if not bin: return bin
47 47 t = bin[0]
48 48 if t == '\0': return bin
49 49 if t == 'x': return zlib.decompress(bin)
50 50 if t == 'u': return bin[1:]
51 51 raise RevlogError(_("unknown compression type %r") % t)
52 52
53 53 indexformat = ">4l20s20s20s"
54 54
55 55 class lazyparser(object):
56 56 """
57 57 this class avoids the need to parse the entirety of large indices
58 58
59 59 By default we parse and load 1000 entries at a time.
60 60
61 61 If no position is specified, we load the whole index, and replace
62 62 the lazy objects in revlog with the underlying objects for
63 63 efficiency in cases where we look at most of the nodes.
64 64 """
65 65 def __init__(self, data, revlog):
66 66 self.data = data
67 67 self.s = struct.calcsize(indexformat)
68 68 self.l = len(data)/self.s
69 69 self.index = [None] * self.l
70 70 self.map = {nullid: -1}
71 71 self.all = 0
72 72 self.revlog = revlog
73 73
74 74 def trunc(self, pos):
75 75 self.l = pos/self.s
76 76
77 77 def load(self, pos=None):
78 78 if self.all: return
79 79 if pos is not None:
80 80 block = pos / 1000
81 81 i = block * 1000
82 82 end = min(self.l, i + 1000)
83 83 else:
84 84 self.all = 1
85 85 i = 0
86 86 end = self.l
87 87 self.revlog.index = self.index
88 88 self.revlog.nodemap = self.map
89 89
90 90 while i < end:
91 91 d = self.data[i * self.s: (i + 1) * self.s]
92 92 e = struct.unpack(indexformat, d)
93 93 self.index[i] = e
94 94 self.map[e[6]] = i
95 95 i += 1
96 96
97 97 class lazyindex(object):
98 98 """a lazy version of the index array"""
99 99 def __init__(self, parser):
100 100 self.p = parser
101 101 def __len__(self):
102 102 return len(self.p.index)
103 103 def load(self, pos):
104 104 if pos < 0:
105 105 pos += len(self.p.index)
106 106 self.p.load(pos)
107 107 return self.p.index[pos]
108 108 def __getitem__(self, pos):
109 109 return self.p.index[pos] or self.load(pos)
110 110 def __delitem__(self, pos):
111 111 del self.p.index[pos]
112 112 def append(self, e):
113 113 self.p.index.append(e)
114 114 def trunc(self, pos):
115 115 self.p.trunc(pos)
116 116
117 117 class lazymap(object):
118 118 """a lazy version of the node map"""
119 119 def __init__(self, parser):
120 120 self.p = parser
121 121 def load(self, key):
122 122 if self.p.all: return
123 123 n = self.p.data.find(key)
124 124 if n < 0:
125 125 raise KeyError(key)
126 126 pos = n / self.p.s
127 127 self.p.load(pos)
128 128 def __contains__(self, key):
129 129 self.p.load()
130 130 return key in self.p.map
131 131 def __iter__(self):
132 132 yield nullid
133 133 for i in xrange(self.p.l):
134 134 try:
135 135 yield self.p.index[i][6]
136 136 except:
137 137 self.p.load(i)
138 138 yield self.p.index[i][6]
139 139 def __getitem__(self, key):
140 140 try:
141 141 return self.p.map[key]
142 142 except KeyError:
143 143 try:
144 144 self.load(key)
145 145 return self.p.map[key]
146 146 except KeyError:
147 147 raise KeyError("node " + hex(key))
148 148 def __setitem__(self, key, val):
149 149 self.p.map[key] = val
150 150 def __delitem__(self, key):
151 151 del self.p.map[key]
152 152
153 153 class RevlogError(Exception): pass
154 154
155 155 class revlog(object):
156 156 """
157 157 the underlying revision storage object
158 158
159 159 A revlog consists of two parts, an index and the revision data.
160 160
161 161 The index is a file with a fixed record size containing
162 162 information on each revision, includings its nodeid (hash), the
163 163 nodeids of its parents, the position and offset of its data within
164 164 the data file, and the revision it's based on. Finally, each entry
165 165 contains a linkrev entry that can serve as a pointer to external
166 166 data.
167 167
168 168 The revision data itself is a linear collection of data chunks.
169 169 Each chunk represents a revision and is usually represented as a
170 170 delta against the previous chunk. To bound lookup time, runs of
171 171 deltas are limited to about 2 times the length of the original
172 172 version data. This makes retrieval of a version proportional to
173 173 its size, or O(1) relative to the number of revisions.
174 174
175 175 Both pieces of the revlog are written to in an append-only
176 176 fashion, which means we never need to rewrite a file to insert or
177 177 remove data, and can use some simple techniques to avoid the need
178 178 for locking while reading.
179 179 """
180 180 def __init__(self, opener, indexfile, datafile):
181 181 """
182 182 create a revlog object
183 183
184 184 opener is a function that abstracts the file opening operation
185 185 and can be used to implement COW semantics or the like.
186 186 """
187 187 self.indexfile = indexfile
188 188 self.datafile = datafile
189 189 self.opener = opener
190 190
191 191 self.indexstat = None
192 192 self.cache = None
193 193 self.chunkcache = None
194 194 self.load()
195 195
196 196 def load(self):
197 197 try:
198 198 f = self.opener(self.indexfile)
199 199 except IOError, inst:
200 200 if inst.errno != errno.ENOENT:
201 201 raise
202 202 i = ""
203 203 else:
204 204 try:
205 205 st = os.fstat(f.fileno())
206 206 except AttributeError, inst:
207 207 st = None
208 208 else:
209 209 oldst = self.indexstat
210 210 if (oldst and st.st_dev == oldst.st_dev
211 211 and st.st_ino == oldst.st_ino
212 212 and st.st_mtime == oldst.st_mtime
213 213 and st.st_ctime == oldst.st_ctime):
214 214 return
215 215 self.indexstat = st
216 216 i = f.read()
217 217
218 218 if i and i[:4] != "\0\0\0\0":
219 219 raise RevlogError(_("incompatible revlog signature on %s") %
220 220 self.indexfile)
221 221
222 222 if len(i) > 10000:
223 223 # big index, let's parse it on demand
224 224 parser = lazyparser(i, self)
225 225 self.index = lazyindex(parser)
226 226 self.nodemap = lazymap(parser)
227 227 else:
228 228 s = struct.calcsize(indexformat)
229 229 l = len(i) / s
230 230 self.index = [None] * l
231 231 m = [None] * l
232 232
233 233 n = 0
234 234 for f in xrange(0, l * s, s):
235 235 # offset, size, base, linkrev, p1, p2, nodeid
236 236 e = struct.unpack(indexformat, i[f:f + s])
237 237 m[n] = (e[6], n)
238 238 self.index[n] = e
239 239 n += 1
240 240
241 241 self.nodemap = dict(m)
242 242 self.nodemap[nullid] = -1
243 243
244 244 def tip(self): return self.node(len(self.index) - 1)
245 245 def count(self): return len(self.index)
246 246 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
247 247 def rev(self, node):
248 248 try:
249 249 return self.nodemap[node]
250 250 except KeyError:
251 251 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
252 252 def linkrev(self, node): return self.index[self.rev(node)][3]
253 253 def parents(self, node):
254 254 if node == nullid: return (nullid, nullid)
255 255 return self.index[self.rev(node)][4:6]
256 256
257 def start(self, rev): return self.index[rev][0]
258 def length(self, rev): return self.index[rev][1]
257 def start(self, rev): return (rev < 0) and -1 or self.index[rev][0]
258 def length(self, rev):
259 if rev < 0:
260 return 0
261 else:
262 return self.index[rev][1]
259 263 def end(self, rev): return self.start(rev) + self.length(rev)
260 def base(self, rev): return self.index[rev][2]
264 def base(self, rev): return (rev < 0) and rev or self.index[rev][2]
261 265
262 266 def reachable(self, rev, stop=None):
263 267 reachable = {}
264 268 visit = [rev]
265 269 reachable[rev] = 1
266 270 if stop:
267 271 stopn = self.rev(stop)
268 272 else:
269 273 stopn = 0
270 274 while visit:
271 275 n = visit.pop(0)
272 276 if n == stop:
273 277 continue
274 278 if n == nullid:
275 279 continue
276 280 for p in self.parents(n):
277 281 if self.rev(p) < stopn:
278 282 continue
279 283 if p not in reachable:
280 284 reachable[p] = 1
281 285 visit.append(p)
282 286 return reachable
283 287
284 288 def nodesbetween(self, roots=None, heads=None):
285 289 """Return a tuple containing three elements. Elements 1 and 2 contain
286 290 a final list bases and heads after all the unreachable ones have been
287 291 pruned. Element 0 contains a topologically sorted list of all
288 292
289 293 nodes that satisfy these constraints:
290 294 1. All nodes must be descended from a node in roots (the nodes on
291 295 roots are considered descended from themselves).
292 296 2. All nodes must also be ancestors of a node in heads (the nodes in
293 297 heads are considered to be their own ancestors).
294 298
295 299 If roots is unspecified, nullid is assumed as the only root.
296 300 If heads is unspecified, it is taken to be the output of the
297 301 heads method (i.e. a list of all nodes in the repository that
298 302 have no children)."""
299 303 nonodes = ([], [], [])
300 304 if roots is not None:
301 305 roots = list(roots)
302 306 if not roots:
303 307 return nonodes
304 308 lowestrev = min([self.rev(n) for n in roots])
305 309 else:
306 310 roots = [nullid] # Everybody's a descendent of nullid
307 311 lowestrev = -1
308 312 if (lowestrev == -1) and (heads is None):
309 313 # We want _all_ the nodes!
310 314 return ([self.node(r) for r in xrange(0, self.count())],
311 315 [nullid], list(self.heads()))
312 316 if heads is None:
313 317 # All nodes are ancestors, so the latest ancestor is the last
314 318 # node.
315 319 highestrev = self.count() - 1
316 320 # Set ancestors to None to signal that every node is an ancestor.
317 321 ancestors = None
318 322 # Set heads to an empty dictionary for later discovery of heads
319 323 heads = {}
320 324 else:
321 325 heads = list(heads)
322 326 if not heads:
323 327 return nonodes
324 328 ancestors = {}
325 329 # Start at the top and keep marking parents until we're done.
326 330 nodestotag = heads[:]
327 331 # Turn heads into a dictionary so we can remove 'fake' heads.
328 332 # Also, later we will be using it to filter out the heads we can't
329 333 # find from roots.
330 334 heads = dict.fromkeys(heads, 0)
331 335 # Remember where the top was so we can use it as a limit later.
332 336 highestrev = max([self.rev(n) for n in nodestotag])
333 337 while nodestotag:
334 338 # grab a node to tag
335 339 n = nodestotag.pop()
336 340 # Never tag nullid
337 341 if n == nullid:
338 342 continue
339 343 # A node's revision number represents its place in a
340 344 # topologically sorted list of nodes.
341 345 r = self.rev(n)
342 346 if r >= lowestrev:
343 347 if n not in ancestors:
344 348 # If we are possibly a descendent of one of the roots
345 349 # and we haven't already been marked as an ancestor
346 350 ancestors[n] = 1 # Mark as ancestor
347 351 # Add non-nullid parents to list of nodes to tag.
348 352 nodestotag.extend([p for p in self.parents(n) if
349 353 p != nullid])
350 354 elif n in heads: # We've seen it before, is it a fake head?
351 355 # So it is, real heads should not be the ancestors of
352 356 # any other heads.
353 357 heads.pop(n)
354 358 if not ancestors:
355 359 return nonodes
356 360 # Now that we have our set of ancestors, we want to remove any
357 361 # roots that are not ancestors.
358 362
359 363 # If one of the roots was nullid, everything is included anyway.
360 364 if lowestrev > -1:
361 365 # But, since we weren't, let's recompute the lowest rev to not
362 366 # include roots that aren't ancestors.
363 367
364 368 # Filter out roots that aren't ancestors of heads
365 369 roots = [n for n in roots if n in ancestors]
366 370 # Recompute the lowest revision
367 371 if roots:
368 372 lowestrev = min([self.rev(n) for n in roots])
369 373 else:
370 374 # No more roots? Return empty list
371 375 return nonodes
372 376 else:
373 377 # We are descending from nullid, and don't need to care about
374 378 # any other roots.
375 379 lowestrev = -1
376 380 roots = [nullid]
377 381 # Transform our roots list into a 'set' (i.e. a dictionary where the
378 382 # values don't matter.
379 383 descendents = dict.fromkeys(roots, 1)
380 384 # Also, keep the original roots so we can filter out roots that aren't
381 385 # 'real' roots (i.e. are descended from other roots).
382 386 roots = descendents.copy()
383 387 # Our topologically sorted list of output nodes.
384 388 orderedout = []
385 389 # Don't start at nullid since we don't want nullid in our output list,
386 390 # and if nullid shows up in descedents, empty parents will look like
387 391 # they're descendents.
388 392 for r in xrange(max(lowestrev, 0), highestrev + 1):
389 393 n = self.node(r)
390 394 isdescendent = False
391 395 if lowestrev == -1: # Everybody is a descendent of nullid
392 396 isdescendent = True
393 397 elif n in descendents:
394 398 # n is already a descendent
395 399 isdescendent = True
396 400 # This check only needs to be done here because all the roots
397 401 # will start being marked is descendents before the loop.
398 402 if n in roots:
399 403 # If n was a root, check if it's a 'real' root.
400 404 p = tuple(self.parents(n))
401 405 # If any of its parents are descendents, it's not a root.
402 406 if (p[0] in descendents) or (p[1] in descendents):
403 407 roots.pop(n)
404 408 else:
405 409 p = tuple(self.parents(n))
406 410 # A node is a descendent if either of its parents are
407 411 # descendents. (We seeded the dependents list with the roots
408 412 # up there, remember?)
409 413 if (p[0] in descendents) or (p[1] in descendents):
410 414 descendents[n] = 1
411 415 isdescendent = True
412 416 if isdescendent and ((ancestors is None) or (n in ancestors)):
413 417 # Only include nodes that are both descendents and ancestors.
414 418 orderedout.append(n)
415 419 if (ancestors is not None) and (n in heads):
416 420 # We're trying to figure out which heads are reachable
417 421 # from roots.
418 422 # Mark this head as having been reached
419 423 heads[n] = 1
420 424 elif ancestors is None:
421 425 # Otherwise, we're trying to discover the heads.
422 426 # Assume this is a head because if it isn't, the next step
423 427 # will eventually remove it.
424 428 heads[n] = 1
425 429 # But, obviously its parents aren't.
426 430 for p in self.parents(n):
427 431 heads.pop(p, None)
428 432 heads = [n for n in heads.iterkeys() if heads[n] != 0]
429 433 roots = roots.keys()
430 434 assert orderedout
431 435 assert roots
432 436 assert heads
433 437 return (orderedout, roots, heads)
434 438
435 439 def heads(self, start=None):
436 440 """return the list of all nodes that have no children
437 441
438 442 if start is specified, only heads that are descendants of
439 443 start will be returned
440 444
441 445 """
442 446 if start is None:
443 447 start = nullid
444 448 reachable = {start: 1}
445 449 heads = {start: 1}
446 450 startrev = self.rev(start)
447 451
448 452 for r in xrange(startrev + 1, self.count()):
449 453 n = self.node(r)
450 454 for pn in self.parents(n):
451 455 if pn in reachable:
452 456 reachable[n] = 1
453 457 heads[n] = 1
454 458 if pn in heads:
455 459 del heads[pn]
456 460 return heads.keys()
457 461
458 462 def children(self, node):
459 463 """find the children of a given node"""
460 464 c = []
461 465 p = self.rev(node)
462 466 for r in range(p + 1, self.count()):
463 467 n = self.node(r)
464 468 for pn in self.parents(n):
465 469 if pn == node:
466 470 c.append(n)
467 471 continue
468 472 elif pn == nullid:
469 473 continue
470 474 return c
471 475
472 476 def lookup(self, id):
473 477 """locate a node based on revision number or subset of hex nodeid"""
474 478 try:
475 479 rev = int(id)
476 480 if str(rev) != id: raise ValueError
477 481 if rev < 0: rev = self.count() + rev
478 482 if rev < 0 or rev >= self.count(): raise ValueError
479 483 return self.node(rev)
480 484 except (ValueError, OverflowError):
481 485 c = []
482 486 for n in self.nodemap:
483 487 if hex(n).startswith(id):
484 488 c.append(n)
485 489 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
486 490 if len(c) < 1: raise RevlogError(_("No match found"))
487 491 return c[0]
488 492
489 493 return None
490 494
491 495 def diff(self, a, b):
492 496 """return a delta between two revisions"""
493 497 return mdiff.textdiff(a, b)
494 498
495 499 def patches(self, t, pl):
496 500 """apply a list of patches to a string"""
497 501 return mdiff.patches(t, pl)
498 502
499 503 def chunk(self, rev):
500 504 start, length = self.start(rev), self.length(rev)
501 505 end = start + length
502 506
503 507 def loadcache():
504 508 cache_length = max(4096 * 1024, length) # 4Mo
505 509 df = self.opener(self.datafile)
506 510 df.seek(start)
507 511 self.chunkcache = (start, df.read(cache_length))
508 512
509 513 if not self.chunkcache:
510 514 loadcache()
511 515
512 516 cache_start = self.chunkcache[0]
513 517 cache_end = cache_start + len(self.chunkcache[1])
514 518 if start >= cache_start and end <= cache_end:
515 519 # it is cached
516 520 offset = start - cache_start
517 521 else:
518 522 loadcache()
519 523 offset = 0
520 524
521 525 #def checkchunk():
522 526 # df = self.opener(self.datafile)
523 527 # df.seek(start)
524 528 # return df.read(length)
525 529 #assert s == checkchunk()
526 530 return decompress(self.chunkcache[1][offset:offset + length])
527 531
528 532 def delta(self, node):
529 533 """return or calculate a delta between a node and its predecessor"""
530 534 r = self.rev(node)
531 b = self.base(r)
532 if r == b:
533 return self.diff(self.revision(self.node(r - 1)),
534 self.revision(node))
535 return self.revdiff(r - 1, r)
536
537 def revdiff(self, rev1, rev2):
538 """return or calculate a delta between two revisions"""
539 b1 = self.base(rev1)
540 b2 = self.base(rev2)
541 if b1 == b2 and rev1 + 1 == rev2:
542 return self.chunk(rev2)
535 543 else:
536 return self.chunk(r)
544 return self.diff(self.revision(self.node(rev1)),
545 self.revision(self.node(rev2)))
537 546
538 547 def revision(self, node):
539 548 """return an uncompressed revision of a given"""
540 549 if node == nullid: return ""
541 550 if self.cache and self.cache[0] == node: return self.cache[2]
542 551
543 552 # look up what we need to read
544 553 text = None
545 554 rev = self.rev(node)
546 555 base = self.base(rev)
547 556
548 557 # do we have useful data cached?
549 558 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
550 559 base = self.cache[1]
551 560 text = self.cache[2]
552 561 else:
553 562 text = self.chunk(base)
554 563
555 564 bins = []
556 565 for r in xrange(base + 1, rev + 1):
557 566 bins.append(self.chunk(r))
558 567
559 text = mdiff.patches(text, bins)
568 text = self.patches(text, bins)
560 569
561 570 p1, p2 = self.parents(node)
562 571 if node != hash(text, p1, p2):
563 572 raise RevlogError(_("integrity check failed on %s:%d")
564 573 % (self.datafile, rev))
565 574
566 575 self.cache = (node, rev, text)
567 576 return text
568 577
569 578 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
570 579 """add a revision to the log
571 580
572 581 text - the revision data to add
573 582 transaction - the transaction object used for rollback
574 583 link - the linkrev data to add
575 584 p1, p2 - the parent nodeids of the revision
576 585 d - an optional precomputed delta
577 586 """
578 587 if text is None: text = ""
579 588 if p1 is None: p1 = self.tip()
580 589 if p2 is None: p2 = nullid
581 590
582 591 node = hash(text, p1, p2)
583 592
584 593 if node in self.nodemap:
585 594 return node
586 595
587 596 n = self.count()
588 597 t = n - 1
589 598
590 599 if n:
591 600 base = self.base(t)
592 601 start = self.start(base)
593 602 end = self.end(t)
594 603 if not d:
595 604 prev = self.revision(self.tip())
596 605 d = self.diff(prev, str(text))
597 606 data = compress(d)
598 607 l = len(data[1]) + len(data[0])
599 608 dist = end - start + l
600 609
601 610 # full versions are inserted when the needed deltas
602 611 # become comparable to the uncompressed text
603 612 if not n or dist > len(text) * 2:
604 613 data = compress(text)
605 614 l = len(data[1]) + len(data[0])
606 615 base = n
607 616 else:
608 617 base = self.base(t)
609 618
610 619 offset = 0
611 620 if t >= 0:
612 621 offset = self.end(t)
613 622
614 623 e = (offset, l, base, link, p1, p2, node)
615 624
616 625 self.index.append(e)
617 626 self.nodemap[node] = n
618 627 entry = struct.pack(indexformat, *e)
619 628
620 629 transaction.add(self.datafile, e[0])
621 630 f = self.opener(self.datafile, "a")
622 631 if data[0]:
623 632 f.write(data[0])
624 633 f.write(data[1])
625 634 transaction.add(self.indexfile, n * len(entry))
626 635 self.opener(self.indexfile, "a").write(entry)
627 636
628 637 self.cache = (node, n, text)
629 638 return node
630 639
631 640 def ancestor(self, a, b):
632 641 """calculate the least common ancestor of nodes a and b"""
633 642 # calculate the distance of every node from root
634 643 dist = {nullid: 0}
635 644 for i in xrange(self.count()):
636 645 n = self.node(i)
637 646 p1, p2 = self.parents(n)
638 647 dist[n] = max(dist[p1], dist[p2]) + 1
639 648
640 649 # traverse ancestors in order of decreasing distance from root
641 650 def ancestors(node):
642 651 # we store negative distances because heap returns smallest member
643 652 h = [(-dist[node], node)]
644 653 seen = {}
645 654 while h:
646 655 d, n = heapq.heappop(h)
647 656 if n not in seen:
648 657 seen[n] = 1
649 658 yield (-d, n)
650 659 for p in self.parents(n):
651 660 heapq.heappush(h, (-dist[p], p))
652 661
653 662 def generations(node):
654 663 sg, s = None, {}
655 664 for g,n in ancestors(node):
656 665 if g != sg:
657 666 if sg:
658 667 yield sg, s
659 668 sg, s = g, {n:1}
660 669 else:
661 670 s[n] = 1
662 671 yield sg, s
663 672
664 673 x = generations(a)
665 674 y = generations(b)
666 675 gx = x.next()
667 676 gy = y.next()
668 677
669 678 # increment each ancestor list until it is closer to root than
670 679 # the other, or they match
671 680 while 1:
672 681 #print "ancestor gen %s %s" % (gx[0], gy[0])
673 682 if gx[0] == gy[0]:
674 683 # find the intersection
675 684 i = [ n for n in gx[1] if n in gy[1] ]
676 685 if i:
677 686 return i[0]
678 687 else:
679 688 #print "next"
680 689 gy = y.next()
681 690 gx = x.next()
682 691 elif gx[0] < gy[0]:
683 692 #print "next y"
684 693 gy = y.next()
685 694 else:
686 695 #print "next x"
687 696 gx = x.next()
688 697
689 698 def group(self, nodelist, lookup, infocollect=None):
690 699 """calculate a delta group
691 700
692 701 Given a list of changeset revs, return a set of deltas and
693 702 metadata corresponding to nodes. the first delta is
694 703 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
695 704 have this parent as it has all history before these
696 705 changesets. parent is parent[0]
697 706 """
698 707 revs = [self.rev(n) for n in nodelist]
699 708
700 709 # if we don't have any revisions touched by these changesets, bail
701 710 if not revs:
702 711 yield struct.pack(">l", 0)
703 712 return
704 713
705 714 # add the parent of the first rev
706 715 p = self.parents(self.node(revs[0]))[0]
707 716 revs.insert(0, self.rev(p))
708 717
709 718 # build deltas
710 719 for d in xrange(0, len(revs) - 1):
711 720 a, b = revs[d], revs[d + 1]
712 na = self.node(a)
713 721 nb = self.node(b)
714 722
715 723 if infocollect is not None:
716 724 infocollect(nb)
717 725
718 # do we need to construct a new delta?
719 if a + 1 != b or self.base(b) == b:
720 ta = self.revision(na)
721 tb = self.revision(nb)
722 d = self.diff(ta, tb)
723 else:
724 d = self.chunk(b)
725
726 d = self.revdiff(a, b)
726 727 p = self.parents(nb)
727 728 meta = nb + p[0] + p[1] + lookup(nb)
728 729 l = struct.pack(">l", len(meta) + len(d) + 4)
729 730 yield l
730 731 yield meta
731 732 yield d
732 733
733 734 yield struct.pack(">l", 0)
734 735
735 736 def addgroup(self, revs, linkmapper, transaction, unique=0):
736 737 """
737 738 add a delta group
738 739
739 740 given a set of deltas, add them to the revision log. the
740 741 first delta is against its parent, which should be in our
741 742 log, the rest are against the previous delta.
742 743 """
743 744
744 745 #track the base of the current delta log
745 746 r = self.count()
746 747 t = r - 1
747 748 node = nullid
748 749
749 750 base = prev = -1
750 751 start = end = measure = 0
751 752 if r:
752 753 base = self.base(t)
753 754 start = self.start(base)
754 755 end = self.end(t)
755 756 measure = self.length(base)
756 757 prev = self.tip()
757 758
758 759 transaction.add(self.datafile, end)
759 760 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
760 761 dfh = self.opener(self.datafile, "a")
761 762 ifh = self.opener(self.indexfile, "a")
762 763
763 764 # loop through our set of deltas
764 765 chain = None
765 766 for chunk in revs:
766 767 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
767 768 link = linkmapper(cs)
768 769 if node in self.nodemap:
769 770 # this can happen if two branches make the same change
770 771 # if unique:
771 772 # raise RevlogError(_("already have %s") % hex(node[:4]))
772 773 chain = node
773 774 continue
774 775 delta = chunk[80:]
775 776
776 777 for p in (p1, p2):
777 778 if not p in self.nodemap:
778 779 raise RevlogError(_("unknown parent %s") % short(p1))
779 780
780 781 if not chain:
781 782 # retrieve the parent revision of the delta chain
782 783 chain = p1
783 784 if not chain in self.nodemap:
784 785 raise RevlogError(_("unknown base %s") % short(chain[:4]))
785 786
786 787 # full versions are inserted when the needed deltas become
787 788 # comparable to the uncompressed text or when the previous
788 789 # version is not the one we have a delta against. We use
789 790 # the size of the previous full rev as a proxy for the
790 791 # current size.
791 792
792 793 if chain == prev:
793 794 tempd = compress(delta)
794 795 cdelta = tempd[0] + tempd[1]
795 796
796 797 if chain != prev or (end - start + len(cdelta)) > measure * 2:
797 798 # flush our writes here so we can read it in revision
798 799 dfh.flush()
799 800 ifh.flush()
800 801 text = self.revision(chain)
801 802 text = self.patches(text, [delta])
802 803 chk = self.addrevision(text, transaction, link, p1, p2)
803 804 if chk != node:
804 805 raise RevlogError(_("consistency error adding group"))
805 806 measure = len(text)
806 807 else:
807 808 e = (end, len(cdelta), base, link, p1, p2, node)
808 809 self.index.append(e)
809 810 self.nodemap[node] = r
810 811 dfh.write(cdelta)
811 812 ifh.write(struct.pack(indexformat, *e))
812 813
813 814 t, r, chain, prev = r, r + 1, node, node
814 815 base = self.base(t)
815 816 start = self.start(base)
816 817 end = self.end(t)
817 818
818 819 dfh.close()
819 820 ifh.close()
820 821 return node
821 822
822 823 def strip(self, rev, minlink):
823 824 if self.count() == 0 or rev >= self.count():
824 825 return
825 826
826 827 # When stripping away a revision, we need to make sure it
827 828 # does not actually belong to an older changeset.
828 829 # The minlink parameter defines the oldest revision
829 830 # we're allowed to strip away.
830 831 while minlink > self.index[rev][3]:
831 832 rev += 1
832 833 if rev >= self.count():
833 834 return
834 835
835 836 # first truncate the files on disk
836 837 end = self.start(rev)
837 838 self.opener(self.datafile, "a").truncate(end)
838 839 end = rev * struct.calcsize(indexformat)
839 840 self.opener(self.indexfile, "a").truncate(end)
840 841
841 842 # then reset internal state in memory to forget those revisions
842 843 self.cache = None
843 844 self.chunkcache = None
844 845 for p in self.index[rev:]:
845 846 del self.nodemap[p[6]]
846 847 del self.index[rev:]
847 848
848 849 # truncating the lazyindex also truncates the lazymap.
849 850 if isinstance(self.index, lazyindex):
850 851 self.index.trunc(end)
851 852
852 853
853 854 def checksize(self):
854 855 expected = 0
855 856 if self.count():
856 857 expected = self.end(self.count() - 1)
857 858
858 859 try:
859 860 f = self.opener(self.datafile)
860 861 f.seek(0, 2)
861 862 actual = f.tell()
862 863 dd = actual - expected
863 864 except IOError, inst:
864 865 if inst.errno != errno.ENOENT:
865 866 raise
866 867 dd = 0
867 868
868 869 try:
869 870 f = self.opener(self.indexfile)
870 871 f.seek(0, 2)
871 872 actual = f.tell()
872 873 s = struct.calcsize(indexformat)
873 874 i = actual / s
874 875 di = actual - (i * s)
875 876 except IOError, inst:
876 877 if inst.errno != errno.ENOENT:
877 878 raise
878 879 di = 0
879 880
880 881 return (dd, di)
881 882
882 883
General Comments 0
You need to be logged in to leave comments. Login now