##// END OF EJS Templates
revlog: attempt to gracefully handle some interleaved damage
Matt Mackall -
r4215:90bb1ab5 default
parent child Browse files
Show More
@@ -1,1292 +1,1295
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s") % flag)
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">I"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: nullrev}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all: return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[self.shaoffset:self.shaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap: return None
161 161
162 162 # hg log will cause many many searches for the manifest
163 163 # nodes. After we get called a few times, just load the whole
164 164 # thing.
165 165 if self.mapfind_count > 8:
166 166 self.loadmap()
167 167 if node in self.map:
168 168 return node
169 169 return None
170 170 self.mapfind_count += 1
171 171 last = self.l - 1
172 172 while self.index[last] != None:
173 173 if last == 0:
174 174 self.all = 1
175 175 self.allmap = 1
176 176 return None
177 177 last -= 1
178 178 end = (last + 1) * self.s
179 179 blocksize = self.s * 256
180 180 while end >= 0:
181 181 start = max(end - blocksize, 0)
182 182 self.dataf.seek(start)
183 183 data = self.dataf.read(end - start)
184 184 findend = end - start
185 185 while True:
186 186 # we're searching backwards, so weh have to make sure
187 187 # we don't find a changeset where this node is a parent
188 188 off = data.rfind(node, 0, findend)
189 189 findend = off
190 190 if off >= 0:
191 191 i = off / self.s
192 192 off = i * self.s
193 193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 194 if n == node:
195 195 self.map[n] = i + start / self.s
196 196 return node
197 197 else:
198 198 break
199 199 end -= blocksize
200 200 return None
201 201
202 202 def loadindex(self, i=None, end=None):
203 203 if self.all: return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (512 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 216 blockstart = (i & ~(32)) * self.s
217 217 blocksize = self.s * 64
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all: self.all = True
223 223
224 224 class lazyindex(object):
225 225 """a lazy version of the index array"""
226 226 def __init__(self, parser):
227 227 self.p = parser
228 228 def __len__(self):
229 229 return len(self.p.index)
230 230 def load(self, pos):
231 231 if pos < 0:
232 232 pos += len(self.p.index)
233 233 self.p.loadindex(pos)
234 234 return self.p.index[pos]
235 235 def __getitem__(self, pos):
236 236 ret = self.p.index[pos] or self.load(pos)
237 237 if isinstance(ret, str):
238 238 ret = struct.unpack(self.p.indexformat, ret)
239 239 return ret
240 240 def __setitem__(self, pos, item):
241 241 self.p.index[pos] = item
242 242 def __delitem__(self, pos):
243 243 del self.p.index[pos]
244 244 def append(self, e):
245 245 self.p.index.append(e)
246 246
247 247 class lazymap(object):
248 248 """a lazy version of the node map"""
249 249 def __init__(self, parser):
250 250 self.p = parser
251 251 def load(self, key):
252 252 n = self.p.findnode(key)
253 253 if n == None:
254 254 raise KeyError(key)
255 255 def __contains__(self, key):
256 256 if key in self.p.map:
257 257 return True
258 258 self.p.loadmap()
259 259 return key in self.p.map
260 260 def __iter__(self):
261 261 yield nullid
262 262 for i in xrange(self.p.l):
263 263 ret = self.p.index[i]
264 264 if not ret:
265 265 self.p.loadindex(i)
266 266 ret = self.p.index[i]
267 267 if isinstance(ret, str):
268 268 ret = struct.unpack(self.p.indexformat, ret)
269 269 yield ret[-1]
270 270 def __getitem__(self, key):
271 271 try:
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 try:
275 275 self.load(key)
276 276 return self.p.map[key]
277 277 except KeyError:
278 278 raise KeyError("node " + hex(key))
279 279 def __setitem__(self, key, val):
280 280 self.p.map[key] = val
281 281 def __delitem__(self, key):
282 282 del self.p.map[key]
283 283
284 284 class RevlogError(Exception): pass
285 285
286 286 class revlog(object):
287 287 """
288 288 the underlying revision storage object
289 289
290 290 A revlog consists of two parts, an index and the revision data.
291 291
292 292 The index is a file with a fixed record size containing
293 293 information on each revision, includings its nodeid (hash), the
294 294 nodeids of its parents, the position and offset of its data within
295 295 the data file, and the revision it's based on. Finally, each entry
296 296 contains a linkrev entry that can serve as a pointer to external
297 297 data.
298 298
299 299 The revision data itself is a linear collection of data chunks.
300 300 Each chunk represents a revision and is usually represented as a
301 301 delta against the previous chunk. To bound lookup time, runs of
302 302 deltas are limited to about 2 times the length of the original
303 303 version data. This makes retrieval of a version proportional to
304 304 its size, or O(1) relative to the number of revisions.
305 305
306 306 Both pieces of the revlog are written to in an append-only
307 307 fashion, which means we never need to rewrite a file to insert or
308 308 remove data, and can use some simple techniques to avoid the need
309 309 for locking while reading.
310 310 """
311 311 def __init__(self, opener, indexfile, datafile,
312 312 defversion=REVLOG_DEFAULT_VERSION):
313 313 """
314 314 create a revlog object
315 315
316 316 opener is a function that abstracts the file opening operation
317 317 and can be used to implement COW semantics or the like.
318 318 """
319 319 self.indexfile = indexfile
320 320 self.datafile = datafile
321 321 self.opener = opener
322 322
323 323 self.indexstat = None
324 324 self.cache = None
325 325 self.chunkcache = None
326 326 self.defversion = defversion
327 327 self.load()
328 328
329 329 def load(self):
330 330 v = self.defversion
331 331 try:
332 332 f = self.opener(self.indexfile)
333 333 i = f.read(4)
334 334 f.seek(0)
335 335 except IOError, inst:
336 336 if inst.errno != errno.ENOENT:
337 337 raise
338 338 i = ""
339 339 else:
340 340 try:
341 341 st = util.fstat(f)
342 342 except AttributeError, inst:
343 343 st = None
344 344 else:
345 345 oldst = self.indexstat
346 346 if (oldst and st.st_dev == oldst.st_dev
347 347 and st.st_ino == oldst.st_ino
348 348 and st.st_mtime == oldst.st_mtime
349 349 and st.st_ctime == oldst.st_ctime):
350 350 return
351 351 self.indexstat = st
352 352 if len(i) > 0:
353 353 v = struct.unpack(versionformat, i)[0]
354 354 flags = v & ~0xFFFF
355 355 fmt = v & 0xFFFF
356 356 if fmt == REVLOGV0:
357 357 if flags:
358 358 raise RevlogError(_("index %s unknown flags %#04x for format v0")
359 359 % (self.indexfile, flags >> 16))
360 360 elif fmt == REVLOGNG:
361 361 if flags & ~REVLOGNGINLINEDATA:
362 362 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
363 363 % (self.indexfile, flags >> 16))
364 364 else:
365 365 raise RevlogError(_("index %s unknown format %d")
366 366 % (self.indexfile, fmt))
367 367 self.version = v
368 368 if v == REVLOGV0:
369 369 self.indexformat = indexformatv0
370 370 shaoffset = v0shaoffset
371 371 else:
372 372 self.indexformat = indexformatng
373 373 shaoffset = ngshaoffset
374 374
375 375 if i:
376 376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 377 st and st.st_size > 10000):
378 378 # big index, let's parse it on demand
379 379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 380 self.index = lazyindex(parser)
381 381 self.nodemap = lazymap(parser)
382 382 else:
383 383 self.parseindex(f, st)
384 384 if self.version != REVLOGV0:
385 385 e = list(self.index[0])
386 386 type = self.ngtype(e[0])
387 387 e[0] = self.offset_type(0, type)
388 388 self.index[0] = e
389 389 else:
390 390 self.nodemap = {nullid: nullrev}
391 391 self.index = []
392 392
393 393
394 394 def parseindex(self, fp, st):
395 395 s = struct.calcsize(self.indexformat)
396 396 self.index = []
397 397 self.nodemap = {nullid: nullrev}
398 398 inline = self.inlinedata()
399 399 n = 0
400 400 leftover = None
401 401 while True:
402 402 if st:
403 403 data = fp.read(65536)
404 404 else:
405 405 # hack for httprangereader, it doesn't do partial reads well
406 406 data = fp.read()
407 407 if not data:
408 408 break
409 409 if n == 0 and self.inlinedata():
410 410 # cache the first chunk
411 411 self.chunkcache = (0, data)
412 412 if leftover:
413 413 data = leftover + data
414 414 leftover = None
415 415 off = 0
416 416 l = len(data)
417 417 while off < l:
418 418 if l - off < s:
419 419 leftover = data[off:]
420 420 break
421 421 cur = data[off:off + s]
422 422 off += s
423 423 e = struct.unpack(self.indexformat, cur)
424 424 self.index.append(e)
425 425 self.nodemap[e[-1]] = n
426 426 n += 1
427 427 if inline:
428 if e[1] < 0:
429 break
428 430 off += e[1]
429 431 if off > l:
430 432 # some things don't seek well, just read it
431 433 fp.read(off - l)
434 break
432 435 if not st:
433 436 break
434 437
435 438
436 439 def ngoffset(self, q):
437 440 if q & 0xFFFF:
438 441 raise RevlogError(_('%s: incompatible revision flag %x') %
439 442 (self.indexfile, q))
440 443 return long(q >> 16)
441 444
442 445 def ngtype(self, q):
443 446 return int(q & 0xFFFF)
444 447
445 448 def offset_type(self, offset, type):
446 449 return long(long(offset) << 16 | type)
447 450
448 451 def loadindex(self, start, end):
449 452 """load a block of indexes all at once from the lazy parser"""
450 453 if isinstance(self.index, lazyindex):
451 454 self.index.p.loadindex(start, end)
452 455
453 456 def loadindexmap(self):
454 457 """loads both the map and the index from the lazy parser"""
455 458 if isinstance(self.index, lazyindex):
456 459 p = self.index.p
457 460 p.loadindex()
458 461 self.nodemap = p.map
459 462
460 463 def loadmap(self):
461 464 """loads the map from the lazy parser"""
462 465 if isinstance(self.nodemap, lazymap):
463 466 self.nodemap.p.loadmap()
464 467 self.nodemap = self.nodemap.p.map
465 468
466 469 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 470 def tip(self): return self.node(len(self.index) - 1)
468 471 def count(self): return len(self.index)
469 472 def node(self, rev):
470 473 return rev == nullrev and nullid or self.index[rev][-1]
471 474 def rev(self, node):
472 475 try:
473 476 return self.nodemap[node]
474 477 except KeyError:
475 478 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 479 def linkrev(self, node):
477 480 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
478 481 def parents(self, node):
479 482 if node == nullid: return (nullid, nullid)
480 483 r = self.rev(node)
481 484 d = self.index[r][-3:-1]
482 485 if self.version == REVLOGV0:
483 486 return d
484 487 return (self.node(d[0]), self.node(d[1]))
485 488 def parentrevs(self, rev):
486 489 if rev == nullrev:
487 490 return (nullrev, nullrev)
488 491 d = self.index[rev][-3:-1]
489 492 if self.version == REVLOGV0:
490 493 return (self.rev(d[0]), self.rev(d[1]))
491 494 return d
492 495 def start(self, rev):
493 496 if rev == nullrev:
494 497 return 0
495 498 if self.version != REVLOGV0:
496 499 return self.ngoffset(self.index[rev][0])
497 500 return self.index[rev][0]
498 501
499 502 def end(self, rev): return self.start(rev) + self.length(rev)
500 503
501 504 def size(self, rev):
502 505 """return the length of the uncompressed text for a given revision"""
503 506 if rev == nullrev:
504 507 return 0
505 508 l = -1
506 509 if self.version != REVLOGV0:
507 510 l = self.index[rev][2]
508 511 if l >= 0:
509 512 return l
510 513
511 514 t = self.revision(self.node(rev))
512 515 return len(t)
513 516
514 517 # alternate implementation, The advantage to this code is it
515 518 # will be faster for a single revision. But, the results are not
516 519 # cached, so finding the size of every revision will be slower.
517 520 """
518 521 if self.cache and self.cache[1] == rev:
519 522 return len(self.cache[2])
520 523
521 524 base = self.base(rev)
522 525 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
523 526 base = self.cache[1]
524 527 text = self.cache[2]
525 528 else:
526 529 text = self.revision(self.node(base))
527 530
528 531 l = len(text)
529 532 for x in xrange(base + 1, rev + 1):
530 533 l = mdiff.patchedsize(l, self.chunk(x))
531 534 return l
532 535 """
533 536
534 537 def length(self, rev):
535 538 if rev == nullrev:
536 539 return 0
537 540 else:
538 541 return self.index[rev][1]
539 542 def base(self, rev):
540 543 if (rev == nullrev):
541 544 return nullrev
542 545 else:
543 546 return self.index[rev][-5]
544 547
545 548 def reachable(self, node, stop=None):
546 549 """return a hash of all nodes ancestral to a given node, including
547 550 the node itself, stopping when stop is matched"""
548 551 reachable = {}
549 552 visit = [node]
550 553 reachable[node] = 1
551 554 if stop:
552 555 stopn = self.rev(stop)
553 556 else:
554 557 stopn = 0
555 558 while visit:
556 559 n = visit.pop(0)
557 560 if n == stop:
558 561 continue
559 562 if n == nullid:
560 563 continue
561 564 for p in self.parents(n):
562 565 if self.rev(p) < stopn:
563 566 continue
564 567 if p not in reachable:
565 568 reachable[p] = 1
566 569 visit.append(p)
567 570 return reachable
568 571
569 572 def nodesbetween(self, roots=None, heads=None):
570 573 """Return a tuple containing three elements. Elements 1 and 2 contain
571 574 a final list bases and heads after all the unreachable ones have been
572 575 pruned. Element 0 contains a topologically sorted list of all
573 576
574 577 nodes that satisfy these constraints:
575 578 1. All nodes must be descended from a node in roots (the nodes on
576 579 roots are considered descended from themselves).
577 580 2. All nodes must also be ancestors of a node in heads (the nodes in
578 581 heads are considered to be their own ancestors).
579 582
580 583 If roots is unspecified, nullid is assumed as the only root.
581 584 If heads is unspecified, it is taken to be the output of the
582 585 heads method (i.e. a list of all nodes in the repository that
583 586 have no children)."""
584 587 nonodes = ([], [], [])
585 588 if roots is not None:
586 589 roots = list(roots)
587 590 if not roots:
588 591 return nonodes
589 592 lowestrev = min([self.rev(n) for n in roots])
590 593 else:
591 594 roots = [nullid] # Everybody's a descendent of nullid
592 595 lowestrev = nullrev
593 596 if (lowestrev == nullrev) and (heads is None):
594 597 # We want _all_ the nodes!
595 598 return ([self.node(r) for r in xrange(0, self.count())],
596 599 [nullid], list(self.heads()))
597 600 if heads is None:
598 601 # All nodes are ancestors, so the latest ancestor is the last
599 602 # node.
600 603 highestrev = self.count() - 1
601 604 # Set ancestors to None to signal that every node is an ancestor.
602 605 ancestors = None
603 606 # Set heads to an empty dictionary for later discovery of heads
604 607 heads = {}
605 608 else:
606 609 heads = list(heads)
607 610 if not heads:
608 611 return nonodes
609 612 ancestors = {}
610 613 # Turn heads into a dictionary so we can remove 'fake' heads.
611 614 # Also, later we will be using it to filter out the heads we can't
612 615 # find from roots.
613 616 heads = dict.fromkeys(heads, 0)
614 617 # Start at the top and keep marking parents until we're done.
615 618 nodestotag = heads.keys()
616 619 # Remember where the top was so we can use it as a limit later.
617 620 highestrev = max([self.rev(n) for n in nodestotag])
618 621 while nodestotag:
619 622 # grab a node to tag
620 623 n = nodestotag.pop()
621 624 # Never tag nullid
622 625 if n == nullid:
623 626 continue
624 627 # A node's revision number represents its place in a
625 628 # topologically sorted list of nodes.
626 629 r = self.rev(n)
627 630 if r >= lowestrev:
628 631 if n not in ancestors:
629 632 # If we are possibly a descendent of one of the roots
630 633 # and we haven't already been marked as an ancestor
631 634 ancestors[n] = 1 # Mark as ancestor
632 635 # Add non-nullid parents to list of nodes to tag.
633 636 nodestotag.extend([p for p in self.parents(n) if
634 637 p != nullid])
635 638 elif n in heads: # We've seen it before, is it a fake head?
636 639 # So it is, real heads should not be the ancestors of
637 640 # any other heads.
638 641 heads.pop(n)
639 642 if not ancestors:
640 643 return nonodes
641 644 # Now that we have our set of ancestors, we want to remove any
642 645 # roots that are not ancestors.
643 646
644 647 # If one of the roots was nullid, everything is included anyway.
645 648 if lowestrev > nullrev:
646 649 # But, since we weren't, let's recompute the lowest rev to not
647 650 # include roots that aren't ancestors.
648 651
649 652 # Filter out roots that aren't ancestors of heads
650 653 roots = [n for n in roots if n in ancestors]
651 654 # Recompute the lowest revision
652 655 if roots:
653 656 lowestrev = min([self.rev(n) for n in roots])
654 657 else:
655 658 # No more roots? Return empty list
656 659 return nonodes
657 660 else:
658 661 # We are descending from nullid, and don't need to care about
659 662 # any other roots.
660 663 lowestrev = nullrev
661 664 roots = [nullid]
662 665 # Transform our roots list into a 'set' (i.e. a dictionary where the
663 666 # values don't matter.
664 667 descendents = dict.fromkeys(roots, 1)
665 668 # Also, keep the original roots so we can filter out roots that aren't
666 669 # 'real' roots (i.e. are descended from other roots).
667 670 roots = descendents.copy()
668 671 # Our topologically sorted list of output nodes.
669 672 orderedout = []
670 673 # Don't start at nullid since we don't want nullid in our output list,
671 674 # and if nullid shows up in descedents, empty parents will look like
672 675 # they're descendents.
673 676 for r in xrange(max(lowestrev, 0), highestrev + 1):
674 677 n = self.node(r)
675 678 isdescendent = False
676 679 if lowestrev == nullrev: # Everybody is a descendent of nullid
677 680 isdescendent = True
678 681 elif n in descendents:
679 682 # n is already a descendent
680 683 isdescendent = True
681 684 # This check only needs to be done here because all the roots
682 685 # will start being marked is descendents before the loop.
683 686 if n in roots:
684 687 # If n was a root, check if it's a 'real' root.
685 688 p = tuple(self.parents(n))
686 689 # If any of its parents are descendents, it's not a root.
687 690 if (p[0] in descendents) or (p[1] in descendents):
688 691 roots.pop(n)
689 692 else:
690 693 p = tuple(self.parents(n))
691 694 # A node is a descendent if either of its parents are
692 695 # descendents. (We seeded the dependents list with the roots
693 696 # up there, remember?)
694 697 if (p[0] in descendents) or (p[1] in descendents):
695 698 descendents[n] = 1
696 699 isdescendent = True
697 700 if isdescendent and ((ancestors is None) or (n in ancestors)):
698 701 # Only include nodes that are both descendents and ancestors.
699 702 orderedout.append(n)
700 703 if (ancestors is not None) and (n in heads):
701 704 # We're trying to figure out which heads are reachable
702 705 # from roots.
703 706 # Mark this head as having been reached
704 707 heads[n] = 1
705 708 elif ancestors is None:
706 709 # Otherwise, we're trying to discover the heads.
707 710 # Assume this is a head because if it isn't, the next step
708 711 # will eventually remove it.
709 712 heads[n] = 1
710 713 # But, obviously its parents aren't.
711 714 for p in self.parents(n):
712 715 heads.pop(p, None)
713 716 heads = [n for n in heads.iterkeys() if heads[n] != 0]
714 717 roots = roots.keys()
715 718 assert orderedout
716 719 assert roots
717 720 assert heads
718 721 return (orderedout, roots, heads)
719 722
720 723 def heads(self, start=None, stop=None):
721 724 """return the list of all nodes that have no children
722 725
723 726 if start is specified, only heads that are descendants of
724 727 start will be returned
725 728 if stop is specified, it will consider all the revs from stop
726 729 as if they had no children
727 730 """
728 731 if start is None:
729 732 start = nullid
730 733 if stop is None:
731 734 stop = []
732 735 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
733 736 startrev = self.rev(start)
734 737 reachable = {startrev: 1}
735 738 heads = {startrev: 1}
736 739
737 740 parentrevs = self.parentrevs
738 741 for r in xrange(startrev + 1, self.count()):
739 742 for p in parentrevs(r):
740 743 if p in reachable:
741 744 if r not in stoprevs:
742 745 reachable[r] = 1
743 746 heads[r] = 1
744 747 if p in heads and p not in stoprevs:
745 748 del heads[p]
746 749
747 750 return [self.node(r) for r in heads]
748 751
749 752 def children(self, node):
750 753 """find the children of a given node"""
751 754 c = []
752 755 p = self.rev(node)
753 756 for r in range(p + 1, self.count()):
754 757 for pr in self.parentrevs(r):
755 758 if pr == p:
756 759 c.append(self.node(r))
757 760 return c
758 761
759 762 def _match(self, id):
760 763 if isinstance(id, (long, int)):
761 764 # rev
762 765 return self.node(id)
763 766 if len(id) == 20:
764 767 # possibly a binary node
765 768 # odds of a binary node being all hex in ASCII are 1 in 10**25
766 769 try:
767 770 node = id
768 771 r = self.rev(node) # quick search the index
769 772 return node
770 773 except RevlogError:
771 774 pass # may be partial hex id
772 775 try:
773 776 # str(rev)
774 777 rev = int(id)
775 778 if str(rev) != id: raise ValueError
776 779 if rev < 0: rev = self.count() + rev
777 780 if rev < 0 or rev >= self.count(): raise ValueError
778 781 return self.node(rev)
779 782 except (ValueError, OverflowError):
780 783 pass
781 784 if len(id) == 40:
782 785 try:
783 786 # a full hex nodeid?
784 787 node = bin(id)
785 788 r = self.rev(node)
786 789 return node
787 790 except TypeError:
788 791 pass
789 792
790 793 def _partialmatch(self, id):
791 794 if len(id) < 40:
792 795 try:
793 796 # hex(node)[:...]
794 797 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
795 798 node = None
796 799 for n in self.nodemap:
797 800 if n.startswith(bin_id) and hex(n).startswith(id):
798 801 if node is not None:
799 802 raise RevlogError(_("Ambiguous identifier"))
800 803 node = n
801 804 if node is not None:
802 805 return node
803 806 except TypeError:
804 807 pass
805 808
806 809 def lookup(self, id):
807 810 """locate a node based on:
808 811 - revision number or str(revision number)
809 812 - nodeid or subset of hex nodeid
810 813 """
811 814
812 815 n = self._match(id)
813 816 if n is not None:
814 817 return n
815 818 n = self._partialmatch(id)
816 819 if n:
817 820 return n
818 821
819 822 raise RevlogError(_("No match found"))
820 823
821 824 def cmp(self, node, text):
822 825 """compare text with a given file revision"""
823 826 p1, p2 = self.parents(node)
824 827 return hash(text, p1, p2) != node
825 828
826 829 def makenode(self, node, text):
827 830 """calculate a file nodeid for text, descended or possibly
828 831 unchanged from node"""
829 832
830 833 if self.cmp(node, text):
831 834 return hash(text, node, nullid)
832 835 return node
833 836
834 837 def diff(self, a, b):
835 838 """return a delta between two revisions"""
836 839 return mdiff.textdiff(a, b)
837 840
838 841 def patches(self, t, pl):
839 842 """apply a list of patches to a string"""
840 843 return mdiff.patches(t, pl)
841 844
842 845 def chunk(self, rev, df=None, cachelen=4096):
843 846 start, length = self.start(rev), self.length(rev)
844 847 inline = self.inlinedata()
845 848 if inline:
846 849 start += (rev + 1) * struct.calcsize(self.indexformat)
847 850 end = start + length
848 851 def loadcache(df):
849 852 cache_length = max(cachelen, length) # 4k
850 853 if not df:
851 854 if inline:
852 855 df = self.opener(self.indexfile)
853 856 else:
854 857 df = self.opener(self.datafile)
855 858 df.seek(start)
856 859 self.chunkcache = (start, df.read(cache_length))
857 860
858 861 if not self.chunkcache:
859 862 loadcache(df)
860 863
861 864 cache_start = self.chunkcache[0]
862 865 cache_end = cache_start + len(self.chunkcache[1])
863 866 if start >= cache_start and end <= cache_end:
864 867 # it is cached
865 868 offset = start - cache_start
866 869 else:
867 870 loadcache(df)
868 871 offset = 0
869 872
870 873 #def checkchunk():
871 874 # df = self.opener(self.datafile)
872 875 # df.seek(start)
873 876 # return df.read(length)
874 877 #assert s == checkchunk()
875 878 return decompress(self.chunkcache[1][offset:offset + length])
876 879
877 880 def delta(self, node):
878 881 """return or calculate a delta between a node and its predecessor"""
879 882 r = self.rev(node)
880 883 return self.revdiff(r - 1, r)
881 884
882 885 def revdiff(self, rev1, rev2):
883 886 """return or calculate a delta between two revisions"""
884 887 b1 = self.base(rev1)
885 888 b2 = self.base(rev2)
886 889 if b1 == b2 and rev1 + 1 == rev2:
887 890 return self.chunk(rev2)
888 891 else:
889 892 return self.diff(self.revision(self.node(rev1)),
890 893 self.revision(self.node(rev2)))
891 894
892 895 def revision(self, node):
893 896 """return an uncompressed revision of a given"""
894 897 if node == nullid: return ""
895 898 if self.cache and self.cache[0] == node: return self.cache[2]
896 899
897 900 # look up what we need to read
898 901 text = None
899 902 rev = self.rev(node)
900 903 base = self.base(rev)
901 904
902 905 if self.inlinedata():
903 906 # we probably have the whole chunk cached
904 907 df = None
905 908 else:
906 909 df = self.opener(self.datafile)
907 910
908 911 # do we have useful data cached?
909 912 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
910 913 base = self.cache[1]
911 914 text = self.cache[2]
912 915 self.loadindex(base, rev + 1)
913 916 else:
914 917 self.loadindex(base, rev + 1)
915 918 text = self.chunk(base, df=df)
916 919
917 920 bins = []
918 921 for r in xrange(base + 1, rev + 1):
919 922 bins.append(self.chunk(r, df=df))
920 923
921 924 text = self.patches(text, bins)
922 925
923 926 p1, p2 = self.parents(node)
924 927 if node != hash(text, p1, p2):
925 928 raise RevlogError(_("integrity check failed on %s:%d")
926 929 % (self.datafile, rev))
927 930
928 931 self.cache = (node, rev, text)
929 932 return text
930 933
931 934 def checkinlinesize(self, tr, fp=None):
932 935 if not self.inlinedata():
933 936 return
934 937 if not fp:
935 938 fp = self.opener(self.indexfile, 'r')
936 939 fp.seek(0, 2)
937 940 size = fp.tell()
938 941 if size < 131072:
939 942 return
940 943 trinfo = tr.find(self.indexfile)
941 944 if trinfo == None:
942 945 raise RevlogError(_("%s not found in the transaction")
943 946 % self.indexfile)
944 947
945 948 trindex = trinfo[2]
946 949 dataoff = self.start(trindex)
947 950
948 951 tr.add(self.datafile, dataoff)
949 952 df = self.opener(self.datafile, 'w')
950 953 calc = struct.calcsize(self.indexformat)
951 954 for r in xrange(self.count()):
952 955 start = self.start(r) + (r + 1) * calc
953 956 length = self.length(r)
954 957 fp.seek(start)
955 958 d = fp.read(length)
956 959 df.write(d)
957 960 fp.close()
958 961 df.close()
959 962 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 963 self.version &= ~(REVLOGNGINLINEDATA)
961 964 if self.count():
962 965 x = self.index[0]
963 966 e = struct.pack(self.indexformat, *x)[4:]
964 967 l = struct.pack(versionformat, self.version)
965 968 fp.write(l)
966 969 fp.write(e)
967 970
968 971 for i in xrange(1, self.count()):
969 972 x = self.index[i]
970 973 e = struct.pack(self.indexformat, *x)
971 974 fp.write(e)
972 975
973 976 # if we don't call rename, the temp file will never replace the
974 977 # real index
975 978 fp.rename()
976 979
977 980 tr.replace(self.indexfile, trindex * calc)
978 981 self.chunkcache = None
979 982
980 983 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
981 984 """add a revision to the log
982 985
983 986 text - the revision data to add
984 987 transaction - the transaction object used for rollback
985 988 link - the linkrev data to add
986 989 p1, p2 - the parent nodeids of the revision
987 990 d - an optional precomputed delta
988 991 """
989 992 if not self.inlinedata():
990 993 dfh = self.opener(self.datafile, "a")
991 994 else:
992 995 dfh = None
993 996 ifh = self.opener(self.indexfile, "a+")
994 997 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
995 998
996 999 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
997 1000 if text is None: text = ""
998 1001 if p1 is None: p1 = self.tip()
999 1002 if p2 is None: p2 = nullid
1000 1003
1001 1004 node = hash(text, p1, p2)
1002 1005
1003 1006 if node in self.nodemap:
1004 1007 return node
1005 1008
1006 1009 n = self.count()
1007 1010 t = n - 1
1008 1011
1009 1012 if n:
1010 1013 base = self.base(t)
1011 1014 start = self.start(base)
1012 1015 end = self.end(t)
1013 1016 if not d:
1014 1017 prev = self.revision(self.tip())
1015 1018 d = self.diff(prev, text)
1016 1019 data = compress(d)
1017 1020 l = len(data[1]) + len(data[0])
1018 1021 dist = end - start + l
1019 1022
1020 1023 # full versions are inserted when the needed deltas
1021 1024 # become comparable to the uncompressed text
1022 1025 if not n or dist > len(text) * 2:
1023 1026 data = compress(text)
1024 1027 l = len(data[1]) + len(data[0])
1025 1028 base = n
1026 1029 else:
1027 1030 base = self.base(t)
1028 1031
1029 1032 offset = 0
1030 1033 if t >= 0:
1031 1034 offset = self.end(t)
1032 1035
1033 1036 if self.version == REVLOGV0:
1034 1037 e = (offset, l, base, link, p1, p2, node)
1035 1038 else:
1036 1039 e = (self.offset_type(offset, 0), l, len(text),
1037 1040 base, link, self.rev(p1), self.rev(p2), node)
1038 1041
1039 1042 self.index.append(e)
1040 1043 self.nodemap[node] = n
1041 1044 entry = struct.pack(self.indexformat, *e)
1042 1045
1043 1046 if not self.inlinedata():
1044 1047 transaction.add(self.datafile, offset)
1045 1048 transaction.add(self.indexfile, n * len(entry))
1046 1049 if data[0]:
1047 1050 dfh.write(data[0])
1048 1051 dfh.write(data[1])
1049 1052 dfh.flush()
1050 1053 else:
1051 1054 ifh.seek(0, 2)
1052 1055 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1053 1056
1054 1057 if len(self.index) == 1 and self.version != REVLOGV0:
1055 1058 l = struct.pack(versionformat, self.version)
1056 1059 ifh.write(l)
1057 1060 entry = entry[4:]
1058 1061
1059 1062 ifh.write(entry)
1060 1063
1061 1064 if self.inlinedata():
1062 1065 ifh.write(data[0])
1063 1066 ifh.write(data[1])
1064 1067 self.checkinlinesize(transaction, ifh)
1065 1068
1066 1069 self.cache = (node, n, text)
1067 1070 return node
1068 1071
1069 1072 def ancestor(self, a, b):
1070 1073 """calculate the least common ancestor of nodes a and b"""
1071 1074
1072 1075 def parents(rev):
1073 1076 return [p for p in self.parentrevs(rev) if p != nullrev]
1074 1077
1075 1078 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1076 1079 if c is None:
1077 1080 return nullid
1078 1081
1079 1082 return self.node(c)
1080 1083
1081 1084 def group(self, nodelist, lookup, infocollect=None):
1082 1085 """calculate a delta group
1083 1086
1084 1087 Given a list of changeset revs, return a set of deltas and
1085 1088 metadata corresponding to nodes. the first delta is
1086 1089 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1087 1090 have this parent as it has all history before these
1088 1091 changesets. parent is parent[0]
1089 1092 """
1090 1093 revs = [self.rev(n) for n in nodelist]
1091 1094
1092 1095 # if we don't have any revisions touched by these changesets, bail
1093 1096 if not revs:
1094 1097 yield changegroup.closechunk()
1095 1098 return
1096 1099
1097 1100 # add the parent of the first rev
1098 1101 p = self.parents(self.node(revs[0]))[0]
1099 1102 revs.insert(0, self.rev(p))
1100 1103
1101 1104 # build deltas
1102 1105 for d in xrange(0, len(revs) - 1):
1103 1106 a, b = revs[d], revs[d + 1]
1104 1107 nb = self.node(b)
1105 1108
1106 1109 if infocollect is not None:
1107 1110 infocollect(nb)
1108 1111
1109 1112 d = self.revdiff(a, b)
1110 1113 p = self.parents(nb)
1111 1114 meta = nb + p[0] + p[1] + lookup(nb)
1112 1115 yield changegroup.genchunk("%s%s" % (meta, d))
1113 1116
1114 1117 yield changegroup.closechunk()
1115 1118
1116 1119 def addgroup(self, revs, linkmapper, transaction, unique=0):
1117 1120 """
1118 1121 add a delta group
1119 1122
1120 1123 given a set of deltas, add them to the revision log. the
1121 1124 first delta is against its parent, which should be in our
1122 1125 log, the rest are against the previous delta.
1123 1126 """
1124 1127
1125 1128 #track the base of the current delta log
1126 1129 r = self.count()
1127 1130 t = r - 1
1128 1131 node = None
1129 1132
1130 1133 base = prev = nullrev
1131 1134 start = end = textlen = 0
1132 1135 if r:
1133 1136 end = self.end(t)
1134 1137
1135 1138 ifh = self.opener(self.indexfile, "a+")
1136 1139 ifh.seek(0, 2)
1137 1140 transaction.add(self.indexfile, ifh.tell(), self.count())
1138 1141 if self.inlinedata():
1139 1142 dfh = None
1140 1143 else:
1141 1144 transaction.add(self.datafile, end)
1142 1145 dfh = self.opener(self.datafile, "a")
1143 1146
1144 1147 # loop through our set of deltas
1145 1148 chain = None
1146 1149 for chunk in revs:
1147 1150 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1148 1151 link = linkmapper(cs)
1149 1152 if node in self.nodemap:
1150 1153 # this can happen if two branches make the same change
1151 1154 # if unique:
1152 1155 # raise RevlogError(_("already have %s") % hex(node[:4]))
1153 1156 chain = node
1154 1157 continue
1155 1158 delta = chunk[80:]
1156 1159
1157 1160 for p in (p1, p2):
1158 1161 if not p in self.nodemap:
1159 1162 raise RevlogError(_("unknown parent %s") % short(p))
1160 1163
1161 1164 if not chain:
1162 1165 # retrieve the parent revision of the delta chain
1163 1166 chain = p1
1164 1167 if not chain in self.nodemap:
1165 1168 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1166 1169
1167 1170 # full versions are inserted when the needed deltas become
1168 1171 # comparable to the uncompressed text or when the previous
1169 1172 # version is not the one we have a delta against. We use
1170 1173 # the size of the previous full rev as a proxy for the
1171 1174 # current size.
1172 1175
1173 1176 if chain == prev:
1174 1177 tempd = compress(delta)
1175 1178 cdelta = tempd[0] + tempd[1]
1176 1179 textlen = mdiff.patchedsize(textlen, delta)
1177 1180
1178 1181 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1179 1182 # flush our writes here so we can read it in revision
1180 1183 if dfh:
1181 1184 dfh.flush()
1182 1185 ifh.flush()
1183 1186 text = self.revision(chain)
1184 1187 text = self.patches(text, [delta])
1185 1188 chk = self._addrevision(text, transaction, link, p1, p2, None,
1186 1189 ifh, dfh)
1187 1190 if not dfh and not self.inlinedata():
1188 1191 # addrevision switched from inline to conventional
1189 1192 # reopen the index
1190 1193 dfh = self.opener(self.datafile, "a")
1191 1194 ifh = self.opener(self.indexfile, "a")
1192 1195 if chk != node:
1193 1196 raise RevlogError(_("consistency error adding group"))
1194 1197 textlen = len(text)
1195 1198 else:
1196 1199 if self.version == REVLOGV0:
1197 1200 e = (end, len(cdelta), base, link, p1, p2, node)
1198 1201 else:
1199 1202 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1200 1203 link, self.rev(p1), self.rev(p2), node)
1201 1204 self.index.append(e)
1202 1205 self.nodemap[node] = r
1203 1206 if self.inlinedata():
1204 1207 ifh.write(struct.pack(self.indexformat, *e))
1205 1208 ifh.write(cdelta)
1206 1209 self.checkinlinesize(transaction, ifh)
1207 1210 if not self.inlinedata():
1208 1211 dfh = self.opener(self.datafile, "a")
1209 1212 ifh = self.opener(self.indexfile, "a")
1210 1213 else:
1211 1214 dfh.write(cdelta)
1212 1215 ifh.write(struct.pack(self.indexformat, *e))
1213 1216
1214 1217 t, r, chain, prev = r, r + 1, node, node
1215 1218 base = self.base(t)
1216 1219 start = self.start(base)
1217 1220 end = self.end(t)
1218 1221
1219 1222 return node
1220 1223
1221 1224 def strip(self, rev, minlink):
1222 1225 if self.count() == 0 or rev >= self.count():
1223 1226 return
1224 1227
1225 1228 if isinstance(self.index, lazyindex):
1226 1229 self.loadindexmap()
1227 1230
1228 1231 # When stripping away a revision, we need to make sure it
1229 1232 # does not actually belong to an older changeset.
1230 1233 # The minlink parameter defines the oldest revision
1231 1234 # we're allowed to strip away.
1232 1235 while minlink > self.index[rev][-4]:
1233 1236 rev += 1
1234 1237 if rev >= self.count():
1235 1238 return
1236 1239
1237 1240 # first truncate the files on disk
1238 1241 end = self.start(rev)
1239 1242 if not self.inlinedata():
1240 1243 df = self.opener(self.datafile, "a")
1241 1244 df.truncate(end)
1242 1245 end = rev * struct.calcsize(self.indexformat)
1243 1246 else:
1244 1247 end += rev * struct.calcsize(self.indexformat)
1245 1248
1246 1249 indexf = self.opener(self.indexfile, "a")
1247 1250 indexf.truncate(end)
1248 1251
1249 1252 # then reset internal state in memory to forget those revisions
1250 1253 self.cache = None
1251 1254 self.chunkcache = None
1252 1255 for x in xrange(rev, self.count()):
1253 1256 del self.nodemap[self.node(x)]
1254 1257
1255 1258 del self.index[rev:]
1256 1259
1257 1260 def checksize(self):
1258 1261 expected = 0
1259 1262 if self.count():
1260 1263 expected = self.end(self.count() - 1)
1261 1264
1262 1265 try:
1263 1266 f = self.opener(self.datafile)
1264 1267 f.seek(0, 2)
1265 1268 actual = f.tell()
1266 1269 dd = actual - expected
1267 1270 except IOError, inst:
1268 1271 if inst.errno != errno.ENOENT:
1269 1272 raise
1270 1273 dd = 0
1271 1274
1272 1275 try:
1273 1276 f = self.opener(self.indexfile)
1274 1277 f.seek(0, 2)
1275 1278 actual = f.tell()
1276 1279 s = struct.calcsize(self.indexformat)
1277 1280 i = actual / s
1278 1281 di = actual - (i * s)
1279 1282 if self.inlinedata():
1280 1283 databytes = 0
1281 1284 for r in xrange(self.count()):
1282 1285 databytes += self.length(r)
1283 1286 dd = 0
1284 1287 di = actual - self.count() * s - databytes
1285 1288 except IOError, inst:
1286 1289 if inst.errno != errno.ENOENT:
1287 1290 raise
1288 1291 di = 0
1289 1292
1290 1293 return (dd, di)
1291 1294
1292 1295
General Comments 0
You need to be logged in to leave comments. Login now