##// END OF EJS Templates
Use unsigned version format....
Thomas Arendsen Hein -
r3755:05120e21 default
parent child Browse files
Show More
@@ -1,1286 +1,1286
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno ancestor mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s") % flag)
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 versionformat = ">i"
84 versionformat = ">I"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: nullrev}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 end = self.datasize
120 120 self.allmap = 1
121 121 cur = 0
122 122 count = 0
123 123 blocksize = self.s * 256
124 124 self.dataf.seek(0)
125 125 while cur < end:
126 126 data = self.dataf.read(blocksize)
127 127 off = 0
128 128 for x in xrange(256):
129 129 n = data[off + self.shaoffset:off + self.shaoffset + 20]
130 130 self.map[n] = count
131 131 count += 1
132 132 if count >= self.l:
133 133 break
134 134 off += self.s
135 135 cur += blocksize
136 136
137 137 def loadblock(self, blockstart, blocksize, data=None):
138 138 if self.all: return
139 139 if data is None:
140 140 self.dataf.seek(blockstart)
141 141 if blockstart + blocksize > self.datasize:
142 142 # the revlog may have grown since we've started running,
143 143 # but we don't have space in self.index for more entries.
144 144 # limit blocksize so that we don't get too much data.
145 145 blocksize = max(self.datasize - blockstart, 0)
146 146 data = self.dataf.read(blocksize)
147 147 lend = len(data) / self.s
148 148 i = blockstart / self.s
149 149 off = 0
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[self.shaoffset:self.shaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap: return None
161 161
162 162 # hg log will cause many many searches for the manifest
163 163 # nodes. After we get called a few times, just load the whole
164 164 # thing.
165 165 if self.mapfind_count > 8:
166 166 self.loadmap()
167 167 if node in self.map:
168 168 return node
169 169 return None
170 170 self.mapfind_count += 1
171 171 last = self.l - 1
172 172 while self.index[last] != None:
173 173 if last == 0:
174 174 self.all = 1
175 175 self.allmap = 1
176 176 return None
177 177 last -= 1
178 178 end = (last + 1) * self.s
179 179 blocksize = self.s * 256
180 180 while end >= 0:
181 181 start = max(end - blocksize, 0)
182 182 self.dataf.seek(start)
183 183 data = self.dataf.read(end - start)
184 184 findend = end - start
185 185 while True:
186 186 # we're searching backwards, so weh have to make sure
187 187 # we don't find a changeset where this node is a parent
188 188 off = data.rfind(node, 0, findend)
189 189 findend = off
190 190 if off >= 0:
191 191 i = off / self.s
192 192 off = i * self.s
193 193 n = data[off + self.shaoffset:off + self.shaoffset + 20]
194 194 if n == node:
195 195 self.map[n] = i + start / self.s
196 196 return node
197 197 else:
198 198 break
199 199 end -= blocksize
200 200 return None
201 201
202 202 def loadindex(self, i=None, end=None):
203 203 if self.all: return
204 204 all = False
205 205 if i == None:
206 206 blockstart = 0
207 207 blocksize = (512 / self.s) * self.s
208 208 end = self.datasize
209 209 all = True
210 210 else:
211 211 if end:
212 212 blockstart = i * self.s
213 213 end = end * self.s
214 214 blocksize = end - blockstart
215 215 else:
216 216 blockstart = (i & ~(32)) * self.s
217 217 blocksize = self.s * 64
218 218 end = blockstart + blocksize
219 219 while blockstart < end:
220 220 self.loadblock(blockstart, blocksize)
221 221 blockstart += blocksize
222 222 if all: self.all = True
223 223
224 224 class lazyindex(object):
225 225 """a lazy version of the index array"""
226 226 def __init__(self, parser):
227 227 self.p = parser
228 228 def __len__(self):
229 229 return len(self.p.index)
230 230 def load(self, pos):
231 231 if pos < 0:
232 232 pos += len(self.p.index)
233 233 self.p.loadindex(pos)
234 234 return self.p.index[pos]
235 235 def __getitem__(self, pos):
236 236 ret = self.p.index[pos] or self.load(pos)
237 237 if isinstance(ret, str):
238 238 ret = struct.unpack(self.p.indexformat, ret)
239 239 return ret
240 240 def __setitem__(self, pos, item):
241 241 self.p.index[pos] = item
242 242 def __delitem__(self, pos):
243 243 del self.p.index[pos]
244 244 def append(self, e):
245 245 self.p.index.append(e)
246 246
247 247 class lazymap(object):
248 248 """a lazy version of the node map"""
249 249 def __init__(self, parser):
250 250 self.p = parser
251 251 def load(self, key):
252 252 n = self.p.findnode(key)
253 253 if n == None:
254 254 raise KeyError(key)
255 255 def __contains__(self, key):
256 256 if key in self.p.map:
257 257 return True
258 258 self.p.loadmap()
259 259 return key in self.p.map
260 260 def __iter__(self):
261 261 yield nullid
262 262 for i in xrange(self.p.l):
263 263 ret = self.p.index[i]
264 264 if not ret:
265 265 self.p.loadindex(i)
266 266 ret = self.p.index[i]
267 267 if isinstance(ret, str):
268 268 ret = struct.unpack(self.p.indexformat, ret)
269 269 yield ret[-1]
270 270 def __getitem__(self, key):
271 271 try:
272 272 return self.p.map[key]
273 273 except KeyError:
274 274 try:
275 275 self.load(key)
276 276 return self.p.map[key]
277 277 except KeyError:
278 278 raise KeyError("node " + hex(key))
279 279 def __setitem__(self, key, val):
280 280 self.p.map[key] = val
281 281 def __delitem__(self, key):
282 282 del self.p.map[key]
283 283
284 284 class RevlogError(Exception): pass
285 285
286 286 class revlog(object):
287 287 """
288 288 the underlying revision storage object
289 289
290 290 A revlog consists of two parts, an index and the revision data.
291 291
292 292 The index is a file with a fixed record size containing
293 293 information on each revision, includings its nodeid (hash), the
294 294 nodeids of its parents, the position and offset of its data within
295 295 the data file, and the revision it's based on. Finally, each entry
296 296 contains a linkrev entry that can serve as a pointer to external
297 297 data.
298 298
299 299 The revision data itself is a linear collection of data chunks.
300 300 Each chunk represents a revision and is usually represented as a
301 301 delta against the previous chunk. To bound lookup time, runs of
302 302 deltas are limited to about 2 times the length of the original
303 303 version data. This makes retrieval of a version proportional to
304 304 its size, or O(1) relative to the number of revisions.
305 305
306 306 Both pieces of the revlog are written to in an append-only
307 307 fashion, which means we never need to rewrite a file to insert or
308 308 remove data, and can use some simple techniques to avoid the need
309 309 for locking while reading.
310 310 """
311 311 def __init__(self, opener, indexfile, datafile,
312 312 defversion=REVLOG_DEFAULT_VERSION):
313 313 """
314 314 create a revlog object
315 315
316 316 opener is a function that abstracts the file opening operation
317 317 and can be used to implement COW semantics or the like.
318 318 """
319 319 self.indexfile = indexfile
320 320 self.datafile = datafile
321 321 self.opener = opener
322 322
323 323 self.indexstat = None
324 324 self.cache = None
325 325 self.chunkcache = None
326 326 self.defversion = defversion
327 327 self.load()
328 328
329 329 def load(self):
330 330 v = self.defversion
331 331 try:
332 332 f = self.opener(self.indexfile)
333 333 i = f.read(4)
334 334 f.seek(0)
335 335 except IOError, inst:
336 336 if inst.errno != errno.ENOENT:
337 337 raise
338 338 i = ""
339 339 else:
340 340 try:
341 341 st = util.fstat(f)
342 342 except AttributeError, inst:
343 343 st = None
344 344 else:
345 345 oldst = self.indexstat
346 346 if (oldst and st.st_dev == oldst.st_dev
347 347 and st.st_ino == oldst.st_ino
348 348 and st.st_mtime == oldst.st_mtime
349 349 and st.st_ctime == oldst.st_ctime):
350 350 return
351 351 self.indexstat = st
352 352 if len(i) > 0:
353 353 v = struct.unpack(versionformat, i)[0]
354 354 flags = v & ~0xFFFF
355 355 fmt = v & 0xFFFF
356 356 if fmt == REVLOGV0:
357 357 if flags:
358 358 raise RevlogError(_("index %s unknown flags %#04x for format v0")
359 359 % (self.indexfile, flags >> 16))
360 360 elif fmt == REVLOGNG:
361 361 if flags & ~REVLOGNGINLINEDATA:
362 362 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
363 363 % (self.indexfile, flags >> 16))
364 364 else:
365 365 raise RevlogError(_("index %s unknown format %d")
366 366 % (self.indexfile, fmt))
367 367 self.version = v
368 368 if v == REVLOGV0:
369 369 self.indexformat = indexformatv0
370 370 shaoffset = v0shaoffset
371 371 else:
372 372 self.indexformat = indexformatng
373 373 shaoffset = ngshaoffset
374 374
375 375 if i:
376 376 if (lazyparser.safe_to_use and not self.inlinedata() and
377 377 st and st.st_size > 10000):
378 378 # big index, let's parse it on demand
379 379 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
380 380 self.index = lazyindex(parser)
381 381 self.nodemap = lazymap(parser)
382 382 else:
383 383 self.parseindex(f, st)
384 384 if self.version != REVLOGV0:
385 385 e = list(self.index[0])
386 386 type = self.ngtype(e[0])
387 387 e[0] = self.offset_type(0, type)
388 388 self.index[0] = e
389 389 else:
390 390 self.nodemap = {nullid: nullrev}
391 391 self.index = []
392 392
393 393
394 394 def parseindex(self, fp, st):
395 395 s = struct.calcsize(self.indexformat)
396 396 self.index = []
397 397 self.nodemap = {nullid: nullrev}
398 398 inline = self.inlinedata()
399 399 n = 0
400 400 leftover = None
401 401 while True:
402 402 if st:
403 403 data = fp.read(65536)
404 404 else:
405 405 # hack for httprangereader, it doesn't do partial reads well
406 406 data = fp.read()
407 407 if not data:
408 408 break
409 409 if n == 0 and self.inlinedata():
410 410 # cache the first chunk
411 411 self.chunkcache = (0, data)
412 412 if leftover:
413 413 data = leftover + data
414 414 leftover = None
415 415 off = 0
416 416 l = len(data)
417 417 while off < l:
418 418 if l - off < s:
419 419 leftover = data[off:]
420 420 break
421 421 cur = data[off:off + s]
422 422 off += s
423 423 e = struct.unpack(self.indexformat, cur)
424 424 self.index.append(e)
425 425 self.nodemap[e[-1]] = n
426 426 n += 1
427 427 if inline:
428 428 off += e[1]
429 429 if off > l:
430 430 # some things don't seek well, just read it
431 431 fp.read(off - l)
432 432 if not st:
433 433 break
434 434
435 435
436 436 def ngoffset(self, q):
437 437 if q & 0xFFFF:
438 438 raise RevlogError(_('%s: incompatible revision flag %x') %
439 439 (self.indexfile, q))
440 440 return long(q >> 16)
441 441
442 442 def ngtype(self, q):
443 443 return int(q & 0xFFFF)
444 444
445 445 def offset_type(self, offset, type):
446 446 return long(long(offset) << 16 | type)
447 447
448 448 def loadindex(self, start, end):
449 449 """load a block of indexes all at once from the lazy parser"""
450 450 if isinstance(self.index, lazyindex):
451 451 self.index.p.loadindex(start, end)
452 452
453 453 def loadindexmap(self):
454 454 """loads both the map and the index from the lazy parser"""
455 455 if isinstance(self.index, lazyindex):
456 456 p = self.index.p
457 457 p.loadindex()
458 458 self.nodemap = p.map
459 459
460 460 def loadmap(self):
461 461 """loads the map from the lazy parser"""
462 462 if isinstance(self.nodemap, lazymap):
463 463 self.nodemap.p.loadmap()
464 464 self.nodemap = self.nodemap.p.map
465 465
466 466 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
467 467 def tip(self): return self.node(len(self.index) - 1)
468 468 def count(self): return len(self.index)
469 469 def node(self, rev):
470 470 return rev == nullrev and nullid or self.index[rev][-1]
471 471 def rev(self, node):
472 472 try:
473 473 return self.nodemap[node]
474 474 except KeyError:
475 475 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
476 476 def linkrev(self, node):
477 477 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
478 478 def parents(self, node):
479 479 if node == nullid: return (nullid, nullid)
480 480 r = self.rev(node)
481 481 d = self.index[r][-3:-1]
482 482 if self.version == REVLOGV0:
483 483 return d
484 484 return (self.node(d[0]), self.node(d[1]))
485 485 def parentrevs(self, rev):
486 486 if rev == nullrev:
487 487 return (nullrev, nullrev)
488 488 d = self.index[rev][-3:-1]
489 489 if self.version == REVLOGV0:
490 490 return (self.rev(d[0]), self.rev(d[1]))
491 491 return d
492 492 def start(self, rev):
493 493 if rev == nullrev:
494 494 return 0
495 495 if self.version != REVLOGV0:
496 496 return self.ngoffset(self.index[rev][0])
497 497 return self.index[rev][0]
498 498
499 499 def end(self, rev): return self.start(rev) + self.length(rev)
500 500
501 501 def size(self, rev):
502 502 """return the length of the uncompressed text for a given revision"""
503 503 if rev == nullrev:
504 504 return 0
505 505 l = -1
506 506 if self.version != REVLOGV0:
507 507 l = self.index[rev][2]
508 508 if l >= 0:
509 509 return l
510 510
511 511 t = self.revision(self.node(rev))
512 512 return len(t)
513 513
514 514 # alternate implementation, The advantage to this code is it
515 515 # will be faster for a single revision. But, the results are not
516 516 # cached, so finding the size of every revision will be slower.
517 517 """
518 518 if self.cache and self.cache[1] == rev:
519 519 return len(self.cache[2])
520 520
521 521 base = self.base(rev)
522 522 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
523 523 base = self.cache[1]
524 524 text = self.cache[2]
525 525 else:
526 526 text = self.revision(self.node(base))
527 527
528 528 l = len(text)
529 529 for x in xrange(base + 1, rev + 1):
530 530 l = mdiff.patchedsize(l, self.chunk(x))
531 531 return l
532 532 """
533 533
534 534 def length(self, rev):
535 535 if rev == nullrev:
536 536 return 0
537 537 else:
538 538 return self.index[rev][1]
539 539 def base(self, rev):
540 540 if (rev == nullrev):
541 541 return nullrev
542 542 else:
543 543 return self.index[rev][-5]
544 544
545 545 def reachable(self, node, stop=None):
546 546 """return a hash of all nodes ancestral to a given node, including
547 547 the node itself, stopping when stop is matched"""
548 548 reachable = {}
549 549 visit = [node]
550 550 reachable[node] = 1
551 551 if stop:
552 552 stopn = self.rev(stop)
553 553 else:
554 554 stopn = 0
555 555 while visit:
556 556 n = visit.pop(0)
557 557 if n == stop:
558 558 continue
559 559 if n == nullid:
560 560 continue
561 561 for p in self.parents(n):
562 562 if self.rev(p) < stopn:
563 563 continue
564 564 if p not in reachable:
565 565 reachable[p] = 1
566 566 visit.append(p)
567 567 return reachable
568 568
569 569 def nodesbetween(self, roots=None, heads=None):
570 570 """Return a tuple containing three elements. Elements 1 and 2 contain
571 571 a final list bases and heads after all the unreachable ones have been
572 572 pruned. Element 0 contains a topologically sorted list of all
573 573
574 574 nodes that satisfy these constraints:
575 575 1. All nodes must be descended from a node in roots (the nodes on
576 576 roots are considered descended from themselves).
577 577 2. All nodes must also be ancestors of a node in heads (the nodes in
578 578 heads are considered to be their own ancestors).
579 579
580 580 If roots is unspecified, nullid is assumed as the only root.
581 581 If heads is unspecified, it is taken to be the output of the
582 582 heads method (i.e. a list of all nodes in the repository that
583 583 have no children)."""
584 584 nonodes = ([], [], [])
585 585 if roots is not None:
586 586 roots = list(roots)
587 587 if not roots:
588 588 return nonodes
589 589 lowestrev = min([self.rev(n) for n in roots])
590 590 else:
591 591 roots = [nullid] # Everybody's a descendent of nullid
592 592 lowestrev = nullrev
593 593 if (lowestrev == nullrev) and (heads is None):
594 594 # We want _all_ the nodes!
595 595 return ([self.node(r) for r in xrange(0, self.count())],
596 596 [nullid], list(self.heads()))
597 597 if heads is None:
598 598 # All nodes are ancestors, so the latest ancestor is the last
599 599 # node.
600 600 highestrev = self.count() - 1
601 601 # Set ancestors to None to signal that every node is an ancestor.
602 602 ancestors = None
603 603 # Set heads to an empty dictionary for later discovery of heads
604 604 heads = {}
605 605 else:
606 606 heads = list(heads)
607 607 if not heads:
608 608 return nonodes
609 609 ancestors = {}
610 610 # Turn heads into a dictionary so we can remove 'fake' heads.
611 611 # Also, later we will be using it to filter out the heads we can't
612 612 # find from roots.
613 613 heads = dict.fromkeys(heads, 0)
614 614 # Start at the top and keep marking parents until we're done.
615 615 nodestotag = heads.keys()
616 616 # Remember where the top was so we can use it as a limit later.
617 617 highestrev = max([self.rev(n) for n in nodestotag])
618 618 while nodestotag:
619 619 # grab a node to tag
620 620 n = nodestotag.pop()
621 621 # Never tag nullid
622 622 if n == nullid:
623 623 continue
624 624 # A node's revision number represents its place in a
625 625 # topologically sorted list of nodes.
626 626 r = self.rev(n)
627 627 if r >= lowestrev:
628 628 if n not in ancestors:
629 629 # If we are possibly a descendent of one of the roots
630 630 # and we haven't already been marked as an ancestor
631 631 ancestors[n] = 1 # Mark as ancestor
632 632 # Add non-nullid parents to list of nodes to tag.
633 633 nodestotag.extend([p for p in self.parents(n) if
634 634 p != nullid])
635 635 elif n in heads: # We've seen it before, is it a fake head?
636 636 # So it is, real heads should not be the ancestors of
637 637 # any other heads.
638 638 heads.pop(n)
639 639 if not ancestors:
640 640 return nonodes
641 641 # Now that we have our set of ancestors, we want to remove any
642 642 # roots that are not ancestors.
643 643
644 644 # If one of the roots was nullid, everything is included anyway.
645 645 if lowestrev > nullrev:
646 646 # But, since we weren't, let's recompute the lowest rev to not
647 647 # include roots that aren't ancestors.
648 648
649 649 # Filter out roots that aren't ancestors of heads
650 650 roots = [n for n in roots if n in ancestors]
651 651 # Recompute the lowest revision
652 652 if roots:
653 653 lowestrev = min([self.rev(n) for n in roots])
654 654 else:
655 655 # No more roots? Return empty list
656 656 return nonodes
657 657 else:
658 658 # We are descending from nullid, and don't need to care about
659 659 # any other roots.
660 660 lowestrev = nullrev
661 661 roots = [nullid]
662 662 # Transform our roots list into a 'set' (i.e. a dictionary where the
663 663 # values don't matter.
664 664 descendents = dict.fromkeys(roots, 1)
665 665 # Also, keep the original roots so we can filter out roots that aren't
666 666 # 'real' roots (i.e. are descended from other roots).
667 667 roots = descendents.copy()
668 668 # Our topologically sorted list of output nodes.
669 669 orderedout = []
670 670 # Don't start at nullid since we don't want nullid in our output list,
671 671 # and if nullid shows up in descedents, empty parents will look like
672 672 # they're descendents.
673 673 for r in xrange(max(lowestrev, 0), highestrev + 1):
674 674 n = self.node(r)
675 675 isdescendent = False
676 676 if lowestrev == nullrev: # Everybody is a descendent of nullid
677 677 isdescendent = True
678 678 elif n in descendents:
679 679 # n is already a descendent
680 680 isdescendent = True
681 681 # This check only needs to be done here because all the roots
682 682 # will start being marked is descendents before the loop.
683 683 if n in roots:
684 684 # If n was a root, check if it's a 'real' root.
685 685 p = tuple(self.parents(n))
686 686 # If any of its parents are descendents, it's not a root.
687 687 if (p[0] in descendents) or (p[1] in descendents):
688 688 roots.pop(n)
689 689 else:
690 690 p = tuple(self.parents(n))
691 691 # A node is a descendent if either of its parents are
692 692 # descendents. (We seeded the dependents list with the roots
693 693 # up there, remember?)
694 694 if (p[0] in descendents) or (p[1] in descendents):
695 695 descendents[n] = 1
696 696 isdescendent = True
697 697 if isdescendent and ((ancestors is None) or (n in ancestors)):
698 698 # Only include nodes that are both descendents and ancestors.
699 699 orderedout.append(n)
700 700 if (ancestors is not None) and (n in heads):
701 701 # We're trying to figure out which heads are reachable
702 702 # from roots.
703 703 # Mark this head as having been reached
704 704 heads[n] = 1
705 705 elif ancestors is None:
706 706 # Otherwise, we're trying to discover the heads.
707 707 # Assume this is a head because if it isn't, the next step
708 708 # will eventually remove it.
709 709 heads[n] = 1
710 710 # But, obviously its parents aren't.
711 711 for p in self.parents(n):
712 712 heads.pop(p, None)
713 713 heads = [n for n in heads.iterkeys() if heads[n] != 0]
714 714 roots = roots.keys()
715 715 assert orderedout
716 716 assert roots
717 717 assert heads
718 718 return (orderedout, roots, heads)
719 719
720 720 def heads(self, start=None):
721 721 """return the list of all nodes that have no children
722 722
723 723 if start is specified, only heads that are descendants of
724 724 start will be returned
725 725
726 726 """
727 727 if start is None:
728 728 start = nullid
729 729 startrev = self.rev(start)
730 730 reachable = {startrev: 1}
731 731 heads = {startrev: 1}
732 732
733 733 parentrevs = self.parentrevs
734 734 for r in xrange(startrev + 1, self.count()):
735 735 for p in parentrevs(r):
736 736 if p in reachable:
737 737 reachable[r] = 1
738 738 heads[r] = 1
739 739 if p in heads:
740 740 del heads[p]
741 741 return [self.node(r) for r in heads]
742 742
743 743 def children(self, node):
744 744 """find the children of a given node"""
745 745 c = []
746 746 p = self.rev(node)
747 747 for r in range(p + 1, self.count()):
748 748 for pr in self.parentrevs(r):
749 749 if pr == p:
750 750 c.append(self.node(r))
751 751 return c
752 752
753 753 def _match(self, id):
754 754 if isinstance(id, (long, int)):
755 755 # rev
756 756 return self.node(id)
757 757 if len(id) == 20:
758 758 # possibly a binary node
759 759 # odds of a binary node being all hex in ASCII are 1 in 10**25
760 760 try:
761 761 node = id
762 762 r = self.rev(node) # quick search the index
763 763 return node
764 764 except RevlogError:
765 765 pass # may be partial hex id
766 766 try:
767 767 # str(rev)
768 768 rev = int(id)
769 769 if str(rev) != id: raise ValueError
770 770 if rev < 0: rev = self.count() + rev
771 771 if rev < 0 or rev >= self.count(): raise ValueError
772 772 return self.node(rev)
773 773 except (ValueError, OverflowError):
774 774 pass
775 775 if len(id) == 40:
776 776 try:
777 777 # a full hex nodeid?
778 778 node = bin(id)
779 779 r = self.rev(node)
780 780 return node
781 781 except TypeError:
782 782 pass
783 783
784 784 def _partialmatch(self, id):
785 785 if len(id) < 40:
786 786 try:
787 787 # hex(node)[:...]
788 788 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
789 789 node = None
790 790 for n in self.nodemap:
791 791 if n.startswith(bin_id) and hex(n).startswith(id):
792 792 if node is not None:
793 793 raise RevlogError(_("Ambiguous identifier"))
794 794 node = n
795 795 if node is not None:
796 796 return node
797 797 except TypeError:
798 798 pass
799 799
800 800 def lookup(self, id):
801 801 """locate a node based on:
802 802 - revision number or str(revision number)
803 803 - nodeid or subset of hex nodeid
804 804 """
805 805
806 806 n = self._match(id)
807 807 if n is not None:
808 808 return n
809 809 n = self._partialmatch(id)
810 810 if n:
811 811 return n
812 812
813 813 raise RevlogError(_("No match found"))
814 814
815 815 def cmp(self, node, text):
816 816 """compare text with a given file revision"""
817 817 p1, p2 = self.parents(node)
818 818 return hash(text, p1, p2) != node
819 819
820 820 def makenode(self, node, text):
821 821 """calculate a file nodeid for text, descended or possibly
822 822 unchanged from node"""
823 823
824 824 if self.cmp(node, text):
825 825 return hash(text, node, nullid)
826 826 return node
827 827
828 828 def diff(self, a, b):
829 829 """return a delta between two revisions"""
830 830 return mdiff.textdiff(a, b)
831 831
832 832 def patches(self, t, pl):
833 833 """apply a list of patches to a string"""
834 834 return mdiff.patches(t, pl)
835 835
836 836 def chunk(self, rev, df=None, cachelen=4096):
837 837 start, length = self.start(rev), self.length(rev)
838 838 inline = self.inlinedata()
839 839 if inline:
840 840 start += (rev + 1) * struct.calcsize(self.indexformat)
841 841 end = start + length
842 842 def loadcache(df):
843 843 cache_length = max(cachelen, length) # 4k
844 844 if not df:
845 845 if inline:
846 846 df = self.opener(self.indexfile)
847 847 else:
848 848 df = self.opener(self.datafile)
849 849 df.seek(start)
850 850 self.chunkcache = (start, df.read(cache_length))
851 851
852 852 if not self.chunkcache:
853 853 loadcache(df)
854 854
855 855 cache_start = self.chunkcache[0]
856 856 cache_end = cache_start + len(self.chunkcache[1])
857 857 if start >= cache_start and end <= cache_end:
858 858 # it is cached
859 859 offset = start - cache_start
860 860 else:
861 861 loadcache(df)
862 862 offset = 0
863 863
864 864 #def checkchunk():
865 865 # df = self.opener(self.datafile)
866 866 # df.seek(start)
867 867 # return df.read(length)
868 868 #assert s == checkchunk()
869 869 return decompress(self.chunkcache[1][offset:offset + length])
870 870
871 871 def delta(self, node):
872 872 """return or calculate a delta between a node and its predecessor"""
873 873 r = self.rev(node)
874 874 return self.revdiff(r - 1, r)
875 875
876 876 def revdiff(self, rev1, rev2):
877 877 """return or calculate a delta between two revisions"""
878 878 b1 = self.base(rev1)
879 879 b2 = self.base(rev2)
880 880 if b1 == b2 and rev1 + 1 == rev2:
881 881 return self.chunk(rev2)
882 882 else:
883 883 return self.diff(self.revision(self.node(rev1)),
884 884 self.revision(self.node(rev2)))
885 885
886 886 def revision(self, node):
887 887 """return an uncompressed revision of a given"""
888 888 if node == nullid: return ""
889 889 if self.cache and self.cache[0] == node: return self.cache[2]
890 890
891 891 # look up what we need to read
892 892 text = None
893 893 rev = self.rev(node)
894 894 base = self.base(rev)
895 895
896 896 if self.inlinedata():
897 897 # we probably have the whole chunk cached
898 898 df = None
899 899 else:
900 900 df = self.opener(self.datafile)
901 901
902 902 # do we have useful data cached?
903 903 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
904 904 base = self.cache[1]
905 905 text = self.cache[2]
906 906 self.loadindex(base, rev + 1)
907 907 else:
908 908 self.loadindex(base, rev + 1)
909 909 text = self.chunk(base, df=df)
910 910
911 911 bins = []
912 912 for r in xrange(base + 1, rev + 1):
913 913 bins.append(self.chunk(r, df=df))
914 914
915 915 text = self.patches(text, bins)
916 916
917 917 p1, p2 = self.parents(node)
918 918 if node != hash(text, p1, p2):
919 919 raise RevlogError(_("integrity check failed on %s:%d")
920 920 % (self.datafile, rev))
921 921
922 922 self.cache = (node, rev, text)
923 923 return text
924 924
925 925 def checkinlinesize(self, tr, fp=None):
926 926 if not self.inlinedata():
927 927 return
928 928 if not fp:
929 929 fp = self.opener(self.indexfile, 'r')
930 930 fp.seek(0, 2)
931 931 size = fp.tell()
932 932 if size < 131072:
933 933 return
934 934 trinfo = tr.find(self.indexfile)
935 935 if trinfo == None:
936 936 raise RevlogError(_("%s not found in the transaction")
937 937 % self.indexfile)
938 938
939 939 trindex = trinfo[2]
940 940 dataoff = self.start(trindex)
941 941
942 942 tr.add(self.datafile, dataoff)
943 943 df = self.opener(self.datafile, 'w')
944 944 calc = struct.calcsize(self.indexformat)
945 945 for r in xrange(self.count()):
946 946 start = self.start(r) + (r + 1) * calc
947 947 length = self.length(r)
948 948 fp.seek(start)
949 949 d = fp.read(length)
950 950 df.write(d)
951 951 fp.close()
952 952 df.close()
953 953 fp = self.opener(self.indexfile, 'w', atomictemp=True)
954 954 self.version &= ~(REVLOGNGINLINEDATA)
955 955 if self.count():
956 956 x = self.index[0]
957 957 e = struct.pack(self.indexformat, *x)[4:]
958 958 l = struct.pack(versionformat, self.version)
959 959 fp.write(l)
960 960 fp.write(e)
961 961
962 962 for i in xrange(1, self.count()):
963 963 x = self.index[i]
964 964 e = struct.pack(self.indexformat, *x)
965 965 fp.write(e)
966 966
967 967 # if we don't call rename, the temp file will never replace the
968 968 # real index
969 969 fp.rename()
970 970
971 971 tr.replace(self.indexfile, trindex * calc)
972 972 self.chunkcache = None
973 973
974 974 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
975 975 """add a revision to the log
976 976
977 977 text - the revision data to add
978 978 transaction - the transaction object used for rollback
979 979 link - the linkrev data to add
980 980 p1, p2 - the parent nodeids of the revision
981 981 d - an optional precomputed delta
982 982 """
983 983 if not self.inlinedata():
984 984 dfh = self.opener(self.datafile, "a")
985 985 else:
986 986 dfh = None
987 987 ifh = self.opener(self.indexfile, "a+")
988 988 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
989 989
990 990 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
991 991 if text is None: text = ""
992 992 if p1 is None: p1 = self.tip()
993 993 if p2 is None: p2 = nullid
994 994
995 995 node = hash(text, p1, p2)
996 996
997 997 if node in self.nodemap:
998 998 return node
999 999
1000 1000 n = self.count()
1001 1001 t = n - 1
1002 1002
1003 1003 if n:
1004 1004 base = self.base(t)
1005 1005 start = self.start(base)
1006 1006 end = self.end(t)
1007 1007 if not d:
1008 1008 prev = self.revision(self.tip())
1009 1009 d = self.diff(prev, text)
1010 1010 data = compress(d)
1011 1011 l = len(data[1]) + len(data[0])
1012 1012 dist = end - start + l
1013 1013
1014 1014 # full versions are inserted when the needed deltas
1015 1015 # become comparable to the uncompressed text
1016 1016 if not n or dist > len(text) * 2:
1017 1017 data = compress(text)
1018 1018 l = len(data[1]) + len(data[0])
1019 1019 base = n
1020 1020 else:
1021 1021 base = self.base(t)
1022 1022
1023 1023 offset = 0
1024 1024 if t >= 0:
1025 1025 offset = self.end(t)
1026 1026
1027 1027 if self.version == REVLOGV0:
1028 1028 e = (offset, l, base, link, p1, p2, node)
1029 1029 else:
1030 1030 e = (self.offset_type(offset, 0), l, len(text),
1031 1031 base, link, self.rev(p1), self.rev(p2), node)
1032 1032
1033 1033 self.index.append(e)
1034 1034 self.nodemap[node] = n
1035 1035 entry = struct.pack(self.indexformat, *e)
1036 1036
1037 1037 if not self.inlinedata():
1038 1038 transaction.add(self.datafile, offset)
1039 1039 transaction.add(self.indexfile, n * len(entry))
1040 1040 if data[0]:
1041 1041 dfh.write(data[0])
1042 1042 dfh.write(data[1])
1043 1043 dfh.flush()
1044 1044 else:
1045 1045 ifh.seek(0, 2)
1046 1046 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1047 1047
1048 1048 if len(self.index) == 1 and self.version != REVLOGV0:
1049 1049 l = struct.pack(versionformat, self.version)
1050 1050 ifh.write(l)
1051 1051 entry = entry[4:]
1052 1052
1053 1053 ifh.write(entry)
1054 1054
1055 1055 if self.inlinedata():
1056 1056 ifh.write(data[0])
1057 1057 ifh.write(data[1])
1058 1058 self.checkinlinesize(transaction, ifh)
1059 1059
1060 1060 self.cache = (node, n, text)
1061 1061 return node
1062 1062
1063 1063 def ancestor(self, a, b):
1064 1064 """calculate the least common ancestor of nodes a and b"""
1065 1065
1066 1066 def parents(rev):
1067 1067 return [p for p in self.parentrevs(rev) if p != nullrev]
1068 1068
1069 1069 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1070 1070 if c is None:
1071 1071 return nullid
1072 1072
1073 1073 return self.node(c)
1074 1074
1075 1075 def group(self, nodelist, lookup, infocollect=None):
1076 1076 """calculate a delta group
1077 1077
1078 1078 Given a list of changeset revs, return a set of deltas and
1079 1079 metadata corresponding to nodes. the first delta is
1080 1080 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1081 1081 have this parent as it has all history before these
1082 1082 changesets. parent is parent[0]
1083 1083 """
1084 1084 revs = [self.rev(n) for n in nodelist]
1085 1085
1086 1086 # if we don't have any revisions touched by these changesets, bail
1087 1087 if not revs:
1088 1088 yield changegroup.closechunk()
1089 1089 return
1090 1090
1091 1091 # add the parent of the first rev
1092 1092 p = self.parents(self.node(revs[0]))[0]
1093 1093 revs.insert(0, self.rev(p))
1094 1094
1095 1095 # build deltas
1096 1096 for d in xrange(0, len(revs) - 1):
1097 1097 a, b = revs[d], revs[d + 1]
1098 1098 nb = self.node(b)
1099 1099
1100 1100 if infocollect is not None:
1101 1101 infocollect(nb)
1102 1102
1103 1103 d = self.revdiff(a, b)
1104 1104 p = self.parents(nb)
1105 1105 meta = nb + p[0] + p[1] + lookup(nb)
1106 1106 yield changegroup.genchunk("%s%s" % (meta, d))
1107 1107
1108 1108 yield changegroup.closechunk()
1109 1109
1110 1110 def addgroup(self, revs, linkmapper, transaction, unique=0):
1111 1111 """
1112 1112 add a delta group
1113 1113
1114 1114 given a set of deltas, add them to the revision log. the
1115 1115 first delta is against its parent, which should be in our
1116 1116 log, the rest are against the previous delta.
1117 1117 """
1118 1118
1119 1119 #track the base of the current delta log
1120 1120 r = self.count()
1121 1121 t = r - 1
1122 1122 node = None
1123 1123
1124 1124 base = prev = nullrev
1125 1125 start = end = textlen = 0
1126 1126 if r:
1127 1127 end = self.end(t)
1128 1128
1129 1129 ifh = self.opener(self.indexfile, "a+")
1130 1130 ifh.seek(0, 2)
1131 1131 transaction.add(self.indexfile, ifh.tell(), self.count())
1132 1132 if self.inlinedata():
1133 1133 dfh = None
1134 1134 else:
1135 1135 transaction.add(self.datafile, end)
1136 1136 dfh = self.opener(self.datafile, "a")
1137 1137
1138 1138 # loop through our set of deltas
1139 1139 chain = None
1140 1140 for chunk in revs:
1141 1141 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1142 1142 link = linkmapper(cs)
1143 1143 if node in self.nodemap:
1144 1144 # this can happen if two branches make the same change
1145 1145 # if unique:
1146 1146 # raise RevlogError(_("already have %s") % hex(node[:4]))
1147 1147 chain = node
1148 1148 continue
1149 1149 delta = chunk[80:]
1150 1150
1151 1151 for p in (p1, p2):
1152 1152 if not p in self.nodemap:
1153 1153 raise RevlogError(_("unknown parent %s") % short(p))
1154 1154
1155 1155 if not chain:
1156 1156 # retrieve the parent revision of the delta chain
1157 1157 chain = p1
1158 1158 if not chain in self.nodemap:
1159 1159 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1160 1160
1161 1161 # full versions are inserted when the needed deltas become
1162 1162 # comparable to the uncompressed text or when the previous
1163 1163 # version is not the one we have a delta against. We use
1164 1164 # the size of the previous full rev as a proxy for the
1165 1165 # current size.
1166 1166
1167 1167 if chain == prev:
1168 1168 tempd = compress(delta)
1169 1169 cdelta = tempd[0] + tempd[1]
1170 1170 textlen = mdiff.patchedsize(textlen, delta)
1171 1171
1172 1172 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1173 1173 # flush our writes here so we can read it in revision
1174 1174 if dfh:
1175 1175 dfh.flush()
1176 1176 ifh.flush()
1177 1177 text = self.revision(chain)
1178 1178 text = self.patches(text, [delta])
1179 1179 chk = self._addrevision(text, transaction, link, p1, p2, None,
1180 1180 ifh, dfh)
1181 1181 if not dfh and not self.inlinedata():
1182 1182 # addrevision switched from inline to conventional
1183 1183 # reopen the index
1184 1184 dfh = self.opener(self.datafile, "a")
1185 1185 ifh = self.opener(self.indexfile, "a")
1186 1186 if chk != node:
1187 1187 raise RevlogError(_("consistency error adding group"))
1188 1188 textlen = len(text)
1189 1189 else:
1190 1190 if self.version == REVLOGV0:
1191 1191 e = (end, len(cdelta), base, link, p1, p2, node)
1192 1192 else:
1193 1193 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1194 1194 link, self.rev(p1), self.rev(p2), node)
1195 1195 self.index.append(e)
1196 1196 self.nodemap[node] = r
1197 1197 if self.inlinedata():
1198 1198 ifh.write(struct.pack(self.indexformat, *e))
1199 1199 ifh.write(cdelta)
1200 1200 self.checkinlinesize(transaction, ifh)
1201 1201 if not self.inlinedata():
1202 1202 dfh = self.opener(self.datafile, "a")
1203 1203 ifh = self.opener(self.indexfile, "a")
1204 1204 else:
1205 1205 dfh.write(cdelta)
1206 1206 ifh.write(struct.pack(self.indexformat, *e))
1207 1207
1208 1208 t, r, chain, prev = r, r + 1, node, node
1209 1209 base = self.base(t)
1210 1210 start = self.start(base)
1211 1211 end = self.end(t)
1212 1212
1213 1213 return node
1214 1214
1215 1215 def strip(self, rev, minlink):
1216 1216 if self.count() == 0 or rev >= self.count():
1217 1217 return
1218 1218
1219 1219 if isinstance(self.index, lazyindex):
1220 1220 self.loadindexmap()
1221 1221
1222 1222 # When stripping away a revision, we need to make sure it
1223 1223 # does not actually belong to an older changeset.
1224 1224 # The minlink parameter defines the oldest revision
1225 1225 # we're allowed to strip away.
1226 1226 while minlink > self.index[rev][-4]:
1227 1227 rev += 1
1228 1228 if rev >= self.count():
1229 1229 return
1230 1230
1231 1231 # first truncate the files on disk
1232 1232 end = self.start(rev)
1233 1233 if not self.inlinedata():
1234 1234 df = self.opener(self.datafile, "a")
1235 1235 df.truncate(end)
1236 1236 end = rev * struct.calcsize(self.indexformat)
1237 1237 else:
1238 1238 end += rev * struct.calcsize(self.indexformat)
1239 1239
1240 1240 indexf = self.opener(self.indexfile, "a")
1241 1241 indexf.truncate(end)
1242 1242
1243 1243 # then reset internal state in memory to forget those revisions
1244 1244 self.cache = None
1245 1245 self.chunkcache = None
1246 1246 for x in xrange(rev, self.count()):
1247 1247 del self.nodemap[self.node(x)]
1248 1248
1249 1249 del self.index[rev:]
1250 1250
1251 1251 def checksize(self):
1252 1252 expected = 0
1253 1253 if self.count():
1254 1254 expected = self.end(self.count() - 1)
1255 1255
1256 1256 try:
1257 1257 f = self.opener(self.datafile)
1258 1258 f.seek(0, 2)
1259 1259 actual = f.tell()
1260 1260 dd = actual - expected
1261 1261 except IOError, inst:
1262 1262 if inst.errno != errno.ENOENT:
1263 1263 raise
1264 1264 dd = 0
1265 1265
1266 1266 try:
1267 1267 f = self.opener(self.indexfile)
1268 1268 f.seek(0, 2)
1269 1269 actual = f.tell()
1270 1270 s = struct.calcsize(self.indexformat)
1271 1271 i = actual / s
1272 1272 di = actual - (i * s)
1273 1273 if self.inlinedata():
1274 1274 databytes = 0
1275 1275 for r in xrange(self.count()):
1276 1276 databytes += self.length(r)
1277 1277 dd = 0
1278 1278 di = actual - self.count() * s - databytes
1279 1279 except IOError, inst:
1280 1280 if inst.errno != errno.ENOENT:
1281 1281 raise
1282 1282 di = 0
1283 1283
1284 1284 return (dd, di)
1285 1285
1286 1286
General Comments 0
You need to be logged in to leave comments. Login now