##// END OF EJS Templates
Fixed spacing in assignment of REVLOG_DEFAULT_VERSION
Thomas Arendsen Hein -
r4277:0ce23256 default
parent child Browse files
Show More
@@ -1,1299 +1,1299 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def flagstr(flag):
30 30 if flag == "inline":
31 31 return REVLOGNGINLINEDATA
32 32 raise RevlogError(_("unknown revlog flag %s") % flag)
33 33
34 34 def hash(text, p1, p2):
35 35 """generate a hash from the given text and its parent hashes
36 36
37 37 This hash combines both the current file contents and its history
38 38 in a manner that makes it easy to distinguish nodes with the same
39 39 content in the revision graph.
40 40 """
41 41 l = [p1, p2]
42 42 l.sort()
43 43 s = sha.new(l[0])
44 44 s.update(l[1])
45 45 s.update(text)
46 46 return s.digest()
47 47
48 48 def compress(text):
49 49 """ generate a possibly-compressed representation of text """
50 50 if not text: return ("", text)
51 51 if len(text) < 44:
52 52 if text[0] == '\0': return ("", text)
53 53 return ('u', text)
54 54 bin = zlib.compress(text)
55 55 if len(bin) > len(text):
56 56 if text[0] == '\0': return ("", text)
57 57 return ('u', text)
58 58 return ("", bin)
59 59
60 60 def decompress(bin):
61 61 """ decompress the given input """
62 62 if not bin: return bin
63 63 t = bin[0]
64 64 if t == '\0': return bin
65 65 if t == 'x': return zlib.decompress(bin)
66 66 if t == 'u': return bin[1:]
67 67 raise RevlogError(_("unknown compression type %r") % t)
68 68
69 69 indexformatv0 = ">4l20s20s20s"
70 70 v0shaoffset = 56
71 71 # index ng:
72 72 # 6 bytes offset
73 73 # 2 bytes flags
74 74 # 4 bytes compressed length
75 75 # 4 bytes uncompressed length
76 76 # 4 bytes: base rev
77 77 # 4 bytes link rev
78 78 # 4 bytes parent 1 rev
79 79 # 4 bytes parent 2 rev
80 80 # 32 bytes: nodeid
81 81 indexformatng = ">Qiiiiii20s12x"
82 82 ngshaoffset = 32
83 83 versionformat = ">I"
84 84
85 85 class lazyparser(object):
86 86 """
87 87 this class avoids the need to parse the entirety of large indices
88 88 """
89 89
90 90 # lazyparser is not safe to use on windows if win32 extensions not
91 91 # available. it keeps file handle open, which make it not possible
92 92 # to break hardlinks on local cloned repos.
93 93 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
94 94 hasattr(util, 'win32api'))
95 95
96 96 def __init__(self, dataf, size, indexformat, shaoffset):
97 97 self.dataf = dataf
98 98 self.format = indexformat
99 99 self.s = struct.calcsize(indexformat)
100 100 self.indexformat = indexformat
101 101 self.datasize = size
102 102 self.l = size/self.s
103 103 self.index = [None] * self.l
104 104 self.map = {nullid: nullrev}
105 105 self.allmap = 0
106 106 self.all = 0
107 107 self.mapfind_count = 0
108 108 self.shaoffset = shaoffset
109 109
110 110 def loadmap(self):
111 111 """
112 112 during a commit, we need to make sure the rev being added is
113 113 not a duplicate. This requires loading the entire index,
114 114 which is fairly slow. loadmap can load up just the node map,
115 115 which takes much less time.
116 116 """
117 117 if self.allmap: return
118 118 end = self.datasize
119 119 self.allmap = 1
120 120 cur = 0
121 121 count = 0
122 122 blocksize = self.s * 256
123 123 self.dataf.seek(0)
124 124 while cur < end:
125 125 data = self.dataf.read(blocksize)
126 126 off = 0
127 127 for x in xrange(256):
128 128 n = data[off + self.shaoffset:off + self.shaoffset + 20]
129 129 self.map[n] = count
130 130 count += 1
131 131 if count >= self.l:
132 132 break
133 133 off += self.s
134 134 cur += blocksize
135 135
136 136 def loadblock(self, blockstart, blocksize, data=None):
137 137 if self.all: return
138 138 if data is None:
139 139 self.dataf.seek(blockstart)
140 140 if blockstart + blocksize > self.datasize:
141 141 # the revlog may have grown since we've started running,
142 142 # but we don't have space in self.index for more entries.
143 143 # limit blocksize so that we don't get too much data.
144 144 blocksize = max(self.datasize - blockstart, 0)
145 145 data = self.dataf.read(blocksize)
146 146 lend = len(data) / self.s
147 147 i = blockstart / self.s
148 148 off = 0
149 149 # lazyindex supports __delitem__
150 150 if lend > len(self.index) - i:
151 151 lend = len(self.index) - i
152 152 for x in xrange(lend):
153 153 if self.index[i + x] == None:
154 154 b = data[off : off + self.s]
155 155 self.index[i + x] = b
156 156 n = b[self.shaoffset:self.shaoffset + 20]
157 157 self.map[n] = i + x
158 158 off += self.s
159 159
160 160 def findnode(self, node):
161 161 """search backwards through the index file for a specific node"""
162 162 if self.allmap: return None
163 163
164 164 # hg log will cause many many searches for the manifest
165 165 # nodes. After we get called a few times, just load the whole
166 166 # thing.
167 167 if self.mapfind_count > 8:
168 168 self.loadmap()
169 169 if node in self.map:
170 170 return node
171 171 return None
172 172 self.mapfind_count += 1
173 173 last = self.l - 1
174 174 while self.index[last] != None:
175 175 if last == 0:
176 176 self.all = 1
177 177 self.allmap = 1
178 178 return None
179 179 last -= 1
180 180 end = (last + 1) * self.s
181 181 blocksize = self.s * 256
182 182 while end >= 0:
183 183 start = max(end - blocksize, 0)
184 184 self.dataf.seek(start)
185 185 data = self.dataf.read(end - start)
186 186 findend = end - start
187 187 while True:
188 188 # we're searching backwards, so weh have to make sure
189 189 # we don't find a changeset where this node is a parent
190 190 off = data.rfind(node, 0, findend)
191 191 findend = off
192 192 if off >= 0:
193 193 i = off / self.s
194 194 off = i * self.s
195 195 n = data[off + self.shaoffset:off + self.shaoffset + 20]
196 196 if n == node:
197 197 self.map[n] = i + start / self.s
198 198 return node
199 199 else:
200 200 break
201 201 end -= blocksize
202 202 return None
203 203
204 204 def loadindex(self, i=None, end=None):
205 205 if self.all: return
206 206 all = False
207 207 if i == None:
208 208 blockstart = 0
209 209 blocksize = (512 / self.s) * self.s
210 210 end = self.datasize
211 211 all = True
212 212 else:
213 213 if end:
214 214 blockstart = i * self.s
215 215 end = end * self.s
216 216 blocksize = end - blockstart
217 217 else:
218 218 blockstart = (i & ~(32)) * self.s
219 219 blocksize = self.s * 64
220 220 end = blockstart + blocksize
221 221 while blockstart < end:
222 222 self.loadblock(blockstart, blocksize)
223 223 blockstart += blocksize
224 224 if all: self.all = True
225 225
226 226 class lazyindex(object):
227 227 """a lazy version of the index array"""
228 228 def __init__(self, parser):
229 229 self.p = parser
230 230 def __len__(self):
231 231 return len(self.p.index)
232 232 def load(self, pos):
233 233 if pos < 0:
234 234 pos += len(self.p.index)
235 235 self.p.loadindex(pos)
236 236 return self.p.index[pos]
237 237 def __getitem__(self, pos):
238 238 ret = self.p.index[pos] or self.load(pos)
239 239 if isinstance(ret, str):
240 240 ret = struct.unpack(self.p.indexformat, ret)
241 241 return ret
242 242 def __setitem__(self, pos, item):
243 243 self.p.index[pos] = item
244 244 def __delitem__(self, pos):
245 245 del self.p.index[pos]
246 246 def append(self, e):
247 247 self.p.index.append(e)
248 248
249 249 class lazymap(object):
250 250 """a lazy version of the node map"""
251 251 def __init__(self, parser):
252 252 self.p = parser
253 253 def load(self, key):
254 254 n = self.p.findnode(key)
255 255 if n == None:
256 256 raise KeyError(key)
257 257 def __contains__(self, key):
258 258 if key in self.p.map:
259 259 return True
260 260 self.p.loadmap()
261 261 return key in self.p.map
262 262 def __iter__(self):
263 263 yield nullid
264 264 for i in xrange(self.p.l):
265 265 ret = self.p.index[i]
266 266 if not ret:
267 267 self.p.loadindex(i)
268 268 ret = self.p.index[i]
269 269 if isinstance(ret, str):
270 270 ret = struct.unpack(self.p.indexformat, ret)
271 271 yield ret[-1]
272 272 def __getitem__(self, key):
273 273 try:
274 274 return self.p.map[key]
275 275 except KeyError:
276 276 try:
277 277 self.load(key)
278 278 return self.p.map[key]
279 279 except KeyError:
280 280 raise KeyError("node " + hex(key))
281 281 def __setitem__(self, key, val):
282 282 self.p.map[key] = val
283 283 def __delitem__(self, key):
284 284 del self.p.map[key]
285 285
286 286 class RevlogError(Exception): pass
287 287 class LookupError(RevlogError): pass
288 288
289 289 class revlog(object):
290 290 """
291 291 the underlying revision storage object
292 292
293 293 A revlog consists of two parts, an index and the revision data.
294 294
295 295 The index is a file with a fixed record size containing
296 296 information on each revision, includings its nodeid (hash), the
297 297 nodeids of its parents, the position and offset of its data within
298 298 the data file, and the revision it's based on. Finally, each entry
299 299 contains a linkrev entry that can serve as a pointer to external
300 300 data.
301 301
302 302 The revision data itself is a linear collection of data chunks.
303 303 Each chunk represents a revision and is usually represented as a
304 304 delta against the previous chunk. To bound lookup time, runs of
305 305 deltas are limited to about 2 times the length of the original
306 306 version data. This makes retrieval of a version proportional to
307 307 its size, or O(1) relative to the number of revisions.
308 308
309 309 Both pieces of the revlog are written to in an append-only
310 310 fashion, which means we never need to rewrite a file to insert or
311 311 remove data, and can use some simple techniques to avoid the need
312 312 for locking while reading.
313 313 """
314 314 def __init__(self, opener, indexfile):
315 315 """
316 316 create a revlog object
317 317
318 318 opener is a function that abstracts the file opening operation
319 319 and can be used to implement COW semantics or the like.
320 320 """
321 321 self.indexfile = indexfile
322 322 self.datafile = indexfile[:-2] + ".d"
323 323 self.opener = opener
324 324
325 325 self.indexstat = None
326 326 self.cache = None
327 327 self.chunkcache = None
328 self.defversion=REVLOG_DEFAULT_VERSION
328 self.defversion = REVLOG_DEFAULT_VERSION
329 329 if hasattr(opener, "defversion"):
330 330 self.defversion = opener.defversion
331 331 self.load()
332 332
333 333 def load(self):
334 334 v = self.defversion
335 335 try:
336 336 f = self.opener(self.indexfile)
337 337 i = f.read(4)
338 338 f.seek(0)
339 339 except IOError, inst:
340 340 if inst.errno != errno.ENOENT:
341 341 raise
342 342 i = ""
343 343 else:
344 344 try:
345 345 st = util.fstat(f)
346 346 except AttributeError, inst:
347 347 st = None
348 348 else:
349 349 oldst = self.indexstat
350 350 if (oldst and st.st_dev == oldst.st_dev
351 351 and st.st_ino == oldst.st_ino
352 352 and st.st_mtime == oldst.st_mtime
353 353 and st.st_ctime == oldst.st_ctime):
354 354 return
355 355 self.indexstat = st
356 356 if len(i) > 0:
357 357 v = struct.unpack(versionformat, i)[0]
358 358 flags = v & ~0xFFFF
359 359 fmt = v & 0xFFFF
360 360 if fmt == REVLOGV0:
361 361 if flags:
362 362 raise RevlogError(_("index %s unknown flags %#04x for format v0")
363 363 % (self.indexfile, flags >> 16))
364 364 elif fmt == REVLOGNG:
365 365 if flags & ~REVLOGNGINLINEDATA:
366 366 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
367 367 % (self.indexfile, flags >> 16))
368 368 else:
369 369 raise RevlogError(_("index %s unknown format %d")
370 370 % (self.indexfile, fmt))
371 371 self.version = v
372 372 if v == REVLOGV0:
373 373 self.indexformat = indexformatv0
374 374 shaoffset = v0shaoffset
375 375 else:
376 376 self.indexformat = indexformatng
377 377 shaoffset = ngshaoffset
378 378
379 379 if i:
380 380 if (lazyparser.safe_to_use and not self.inlinedata() and
381 381 st and st.st_size > 10000):
382 382 # big index, let's parse it on demand
383 383 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
384 384 self.index = lazyindex(parser)
385 385 self.nodemap = lazymap(parser)
386 386 else:
387 387 self.parseindex(f, st)
388 388 if self.version != REVLOGV0:
389 389 e = list(self.index[0])
390 390 type = self.ngtype(e[0])
391 391 e[0] = self.offset_type(0, type)
392 392 self.index[0] = e
393 393 else:
394 394 self.nodemap = {nullid: nullrev}
395 395 self.index = []
396 396
397 397
398 398 def parseindex(self, fp, st):
399 399 s = struct.calcsize(self.indexformat)
400 400 self.index = []
401 401 self.nodemap = {nullid: nullrev}
402 402 inline = self.inlinedata()
403 403 n = 0
404 404 leftover = None
405 405 while True:
406 406 if st:
407 407 data = fp.read(65536)
408 408 else:
409 409 # hack for httprangereader, it doesn't do partial reads well
410 410 data = fp.read()
411 411 if not data:
412 412 break
413 413 if n == 0 and self.inlinedata():
414 414 # cache the first chunk
415 415 self.chunkcache = (0, data)
416 416 if leftover:
417 417 data = leftover + data
418 418 leftover = None
419 419 off = 0
420 420 l = len(data)
421 421 while off < l:
422 422 if l - off < s:
423 423 leftover = data[off:]
424 424 break
425 425 cur = data[off:off + s]
426 426 off += s
427 427 e = struct.unpack(self.indexformat, cur)
428 428 self.index.append(e)
429 429 self.nodemap[e[-1]] = n
430 430 n += 1
431 431 if inline:
432 432 if e[1] < 0:
433 433 break
434 434 off += e[1]
435 435 if off > l:
436 436 # some things don't seek well, just read it
437 437 fp.read(off - l)
438 438 break
439 439 if not st:
440 440 break
441 441
442 442
443 443 def ngoffset(self, q):
444 444 if q & 0xFFFF:
445 445 raise RevlogError(_('%s: incompatible revision flag %x') %
446 446 (self.indexfile, q))
447 447 return long(q >> 16)
448 448
449 449 def ngtype(self, q):
450 450 return int(q & 0xFFFF)
451 451
452 452 def offset_type(self, offset, type):
453 453 return long(long(offset) << 16 | type)
454 454
455 455 def loadindex(self, start, end):
456 456 """load a block of indexes all at once from the lazy parser"""
457 457 if isinstance(self.index, lazyindex):
458 458 self.index.p.loadindex(start, end)
459 459
460 460 def loadindexmap(self):
461 461 """loads both the map and the index from the lazy parser"""
462 462 if isinstance(self.index, lazyindex):
463 463 p = self.index.p
464 464 p.loadindex()
465 465 self.nodemap = p.map
466 466
467 467 def loadmap(self):
468 468 """loads the map from the lazy parser"""
469 469 if isinstance(self.nodemap, lazymap):
470 470 self.nodemap.p.loadmap()
471 471 self.nodemap = self.nodemap.p.map
472 472
473 473 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
474 474 def tip(self): return self.node(len(self.index) - 1)
475 475 def count(self): return len(self.index)
476 476 def node(self, rev):
477 477 return rev == nullrev and nullid or self.index[rev][-1]
478 478 def rev(self, node):
479 479 try:
480 480 return self.nodemap[node]
481 481 except KeyError:
482 482 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
483 483 def linkrev(self, node):
484 484 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
485 485 def parents(self, node):
486 486 if node == nullid: return (nullid, nullid)
487 487 r = self.rev(node)
488 488 d = self.index[r][-3:-1]
489 489 if self.version == REVLOGV0:
490 490 return d
491 491 return (self.node(d[0]), self.node(d[1]))
492 492 def parentrevs(self, rev):
493 493 if rev == nullrev:
494 494 return (nullrev, nullrev)
495 495 d = self.index[rev][-3:-1]
496 496 if self.version == REVLOGV0:
497 497 return (self.rev(d[0]), self.rev(d[1]))
498 498 return d
499 499 def start(self, rev):
500 500 if rev == nullrev:
501 501 return 0
502 502 if self.version != REVLOGV0:
503 503 return self.ngoffset(self.index[rev][0])
504 504 return self.index[rev][0]
505 505
506 506 def end(self, rev): return self.start(rev) + self.length(rev)
507 507
508 508 def size(self, rev):
509 509 """return the length of the uncompressed text for a given revision"""
510 510 if rev == nullrev:
511 511 return 0
512 512 l = -1
513 513 if self.version != REVLOGV0:
514 514 l = self.index[rev][2]
515 515 if l >= 0:
516 516 return l
517 517
518 518 t = self.revision(self.node(rev))
519 519 return len(t)
520 520
521 521 # alternate implementation, The advantage to this code is it
522 522 # will be faster for a single revision. But, the results are not
523 523 # cached, so finding the size of every revision will be slower.
524 524 """
525 525 if self.cache and self.cache[1] == rev:
526 526 return len(self.cache[2])
527 527
528 528 base = self.base(rev)
529 529 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
530 530 base = self.cache[1]
531 531 text = self.cache[2]
532 532 else:
533 533 text = self.revision(self.node(base))
534 534
535 535 l = len(text)
536 536 for x in xrange(base + 1, rev + 1):
537 537 l = mdiff.patchedsize(l, self.chunk(x))
538 538 return l
539 539 """
540 540
541 541 def length(self, rev):
542 542 if rev == nullrev:
543 543 return 0
544 544 else:
545 545 return self.index[rev][1]
546 546 def base(self, rev):
547 547 if (rev == nullrev):
548 548 return nullrev
549 549 else:
550 550 return self.index[rev][-5]
551 551
552 552 def reachable(self, node, stop=None):
553 553 """return a hash of all nodes ancestral to a given node, including
554 554 the node itself, stopping when stop is matched"""
555 555 reachable = {}
556 556 visit = [node]
557 557 reachable[node] = 1
558 558 if stop:
559 559 stopn = self.rev(stop)
560 560 else:
561 561 stopn = 0
562 562 while visit:
563 563 n = visit.pop(0)
564 564 if n == stop:
565 565 continue
566 566 if n == nullid:
567 567 continue
568 568 for p in self.parents(n):
569 569 if self.rev(p) < stopn:
570 570 continue
571 571 if p not in reachable:
572 572 reachable[p] = 1
573 573 visit.append(p)
574 574 return reachable
575 575
576 576 def nodesbetween(self, roots=None, heads=None):
577 577 """Return a tuple containing three elements. Elements 1 and 2 contain
578 578 a final list bases and heads after all the unreachable ones have been
579 579 pruned. Element 0 contains a topologically sorted list of all
580 580
581 581 nodes that satisfy these constraints:
582 582 1. All nodes must be descended from a node in roots (the nodes on
583 583 roots are considered descended from themselves).
584 584 2. All nodes must also be ancestors of a node in heads (the nodes in
585 585 heads are considered to be their own ancestors).
586 586
587 587 If roots is unspecified, nullid is assumed as the only root.
588 588 If heads is unspecified, it is taken to be the output of the
589 589 heads method (i.e. a list of all nodes in the repository that
590 590 have no children)."""
591 591 nonodes = ([], [], [])
592 592 if roots is not None:
593 593 roots = list(roots)
594 594 if not roots:
595 595 return nonodes
596 596 lowestrev = min([self.rev(n) for n in roots])
597 597 else:
598 598 roots = [nullid] # Everybody's a descendent of nullid
599 599 lowestrev = nullrev
600 600 if (lowestrev == nullrev) and (heads is None):
601 601 # We want _all_ the nodes!
602 602 return ([self.node(r) for r in xrange(0, self.count())],
603 603 [nullid], list(self.heads()))
604 604 if heads is None:
605 605 # All nodes are ancestors, so the latest ancestor is the last
606 606 # node.
607 607 highestrev = self.count() - 1
608 608 # Set ancestors to None to signal that every node is an ancestor.
609 609 ancestors = None
610 610 # Set heads to an empty dictionary for later discovery of heads
611 611 heads = {}
612 612 else:
613 613 heads = list(heads)
614 614 if not heads:
615 615 return nonodes
616 616 ancestors = {}
617 617 # Turn heads into a dictionary so we can remove 'fake' heads.
618 618 # Also, later we will be using it to filter out the heads we can't
619 619 # find from roots.
620 620 heads = dict.fromkeys(heads, 0)
621 621 # Start at the top and keep marking parents until we're done.
622 622 nodestotag = heads.keys()
623 623 # Remember where the top was so we can use it as a limit later.
624 624 highestrev = max([self.rev(n) for n in nodestotag])
625 625 while nodestotag:
626 626 # grab a node to tag
627 627 n = nodestotag.pop()
628 628 # Never tag nullid
629 629 if n == nullid:
630 630 continue
631 631 # A node's revision number represents its place in a
632 632 # topologically sorted list of nodes.
633 633 r = self.rev(n)
634 634 if r >= lowestrev:
635 635 if n not in ancestors:
636 636 # If we are possibly a descendent of one of the roots
637 637 # and we haven't already been marked as an ancestor
638 638 ancestors[n] = 1 # Mark as ancestor
639 639 # Add non-nullid parents to list of nodes to tag.
640 640 nodestotag.extend([p for p in self.parents(n) if
641 641 p != nullid])
642 642 elif n in heads: # We've seen it before, is it a fake head?
643 643 # So it is, real heads should not be the ancestors of
644 644 # any other heads.
645 645 heads.pop(n)
646 646 if not ancestors:
647 647 return nonodes
648 648 # Now that we have our set of ancestors, we want to remove any
649 649 # roots that are not ancestors.
650 650
651 651 # If one of the roots was nullid, everything is included anyway.
652 652 if lowestrev > nullrev:
653 653 # But, since we weren't, let's recompute the lowest rev to not
654 654 # include roots that aren't ancestors.
655 655
656 656 # Filter out roots that aren't ancestors of heads
657 657 roots = [n for n in roots if n in ancestors]
658 658 # Recompute the lowest revision
659 659 if roots:
660 660 lowestrev = min([self.rev(n) for n in roots])
661 661 else:
662 662 # No more roots? Return empty list
663 663 return nonodes
664 664 else:
665 665 # We are descending from nullid, and don't need to care about
666 666 # any other roots.
667 667 lowestrev = nullrev
668 668 roots = [nullid]
669 669 # Transform our roots list into a 'set' (i.e. a dictionary where the
670 670 # values don't matter.
671 671 descendents = dict.fromkeys(roots, 1)
672 672 # Also, keep the original roots so we can filter out roots that aren't
673 673 # 'real' roots (i.e. are descended from other roots).
674 674 roots = descendents.copy()
675 675 # Our topologically sorted list of output nodes.
676 676 orderedout = []
677 677 # Don't start at nullid since we don't want nullid in our output list,
678 678 # and if nullid shows up in descedents, empty parents will look like
679 679 # they're descendents.
680 680 for r in xrange(max(lowestrev, 0), highestrev + 1):
681 681 n = self.node(r)
682 682 isdescendent = False
683 683 if lowestrev == nullrev: # Everybody is a descendent of nullid
684 684 isdescendent = True
685 685 elif n in descendents:
686 686 # n is already a descendent
687 687 isdescendent = True
688 688 # This check only needs to be done here because all the roots
689 689 # will start being marked is descendents before the loop.
690 690 if n in roots:
691 691 # If n was a root, check if it's a 'real' root.
692 692 p = tuple(self.parents(n))
693 693 # If any of its parents are descendents, it's not a root.
694 694 if (p[0] in descendents) or (p[1] in descendents):
695 695 roots.pop(n)
696 696 else:
697 697 p = tuple(self.parents(n))
698 698 # A node is a descendent if either of its parents are
699 699 # descendents. (We seeded the dependents list with the roots
700 700 # up there, remember?)
701 701 if (p[0] in descendents) or (p[1] in descendents):
702 702 descendents[n] = 1
703 703 isdescendent = True
704 704 if isdescendent and ((ancestors is None) or (n in ancestors)):
705 705 # Only include nodes that are both descendents and ancestors.
706 706 orderedout.append(n)
707 707 if (ancestors is not None) and (n in heads):
708 708 # We're trying to figure out which heads are reachable
709 709 # from roots.
710 710 # Mark this head as having been reached
711 711 heads[n] = 1
712 712 elif ancestors is None:
713 713 # Otherwise, we're trying to discover the heads.
714 714 # Assume this is a head because if it isn't, the next step
715 715 # will eventually remove it.
716 716 heads[n] = 1
717 717 # But, obviously its parents aren't.
718 718 for p in self.parents(n):
719 719 heads.pop(p, None)
720 720 heads = [n for n in heads.iterkeys() if heads[n] != 0]
721 721 roots = roots.keys()
722 722 assert orderedout
723 723 assert roots
724 724 assert heads
725 725 return (orderedout, roots, heads)
726 726
727 727 def heads(self, start=None, stop=None):
728 728 """return the list of all nodes that have no children
729 729
730 730 if start is specified, only heads that are descendants of
731 731 start will be returned
732 732 if stop is specified, it will consider all the revs from stop
733 733 as if they had no children
734 734 """
735 735 if start is None:
736 736 start = nullid
737 737 if stop is None:
738 738 stop = []
739 739 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
740 740 startrev = self.rev(start)
741 741 reachable = {startrev: 1}
742 742 heads = {startrev: 1}
743 743
744 744 parentrevs = self.parentrevs
745 745 for r in xrange(startrev + 1, self.count()):
746 746 for p in parentrevs(r):
747 747 if p in reachable:
748 748 if r not in stoprevs:
749 749 reachable[r] = 1
750 750 heads[r] = 1
751 751 if p in heads and p not in stoprevs:
752 752 del heads[p]
753 753
754 754 return [self.node(r) for r in heads]
755 755
756 756 def children(self, node):
757 757 """find the children of a given node"""
758 758 c = []
759 759 p = self.rev(node)
760 760 for r in range(p + 1, self.count()):
761 761 for pr in self.parentrevs(r):
762 762 if pr == p:
763 763 c.append(self.node(r))
764 764 return c
765 765
766 766 def _match(self, id):
767 767 if isinstance(id, (long, int)):
768 768 # rev
769 769 return self.node(id)
770 770 if len(id) == 20:
771 771 # possibly a binary node
772 772 # odds of a binary node being all hex in ASCII are 1 in 10**25
773 773 try:
774 774 node = id
775 775 r = self.rev(node) # quick search the index
776 776 return node
777 777 except LookupError:
778 778 pass # may be partial hex id
779 779 try:
780 780 # str(rev)
781 781 rev = int(id)
782 782 if str(rev) != id: raise ValueError
783 783 if rev < 0: rev = self.count() + rev
784 784 if rev < 0 or rev >= self.count(): raise ValueError
785 785 return self.node(rev)
786 786 except (ValueError, OverflowError):
787 787 pass
788 788 if len(id) == 40:
789 789 try:
790 790 # a full hex nodeid?
791 791 node = bin(id)
792 792 r = self.rev(node)
793 793 return node
794 794 except TypeError:
795 795 pass
796 796
797 797 def _partialmatch(self, id):
798 798 if len(id) < 40:
799 799 try:
800 800 # hex(node)[:...]
801 801 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
802 802 node = None
803 803 for n in self.nodemap:
804 804 if n.startswith(bin_id) and hex(n).startswith(id):
805 805 if node is not None:
806 806 raise LookupError(_("Ambiguous identifier"))
807 807 node = n
808 808 if node is not None:
809 809 return node
810 810 except TypeError:
811 811 pass
812 812
813 813 def lookup(self, id):
814 814 """locate a node based on:
815 815 - revision number or str(revision number)
816 816 - nodeid or subset of hex nodeid
817 817 """
818 818
819 819 n = self._match(id)
820 820 if n is not None:
821 821 return n
822 822 n = self._partialmatch(id)
823 823 if n:
824 824 return n
825 825
826 826 raise LookupError(_("No match found"))
827 827
828 828 def cmp(self, node, text):
829 829 """compare text with a given file revision"""
830 830 p1, p2 = self.parents(node)
831 831 return hash(text, p1, p2) != node
832 832
833 833 def makenode(self, node, text):
834 834 """calculate a file nodeid for text, descended or possibly
835 835 unchanged from node"""
836 836
837 837 if self.cmp(node, text):
838 838 return hash(text, node, nullid)
839 839 return node
840 840
841 841 def diff(self, a, b):
842 842 """return a delta between two revisions"""
843 843 return mdiff.textdiff(a, b)
844 844
845 845 def patches(self, t, pl):
846 846 """apply a list of patches to a string"""
847 847 return mdiff.patches(t, pl)
848 848
849 849 def chunk(self, rev, df=None, cachelen=4096):
850 850 start, length = self.start(rev), self.length(rev)
851 851 inline = self.inlinedata()
852 852 if inline:
853 853 start += (rev + 1) * struct.calcsize(self.indexformat)
854 854 end = start + length
855 855 def loadcache(df):
856 856 cache_length = max(cachelen, length) # 4k
857 857 if not df:
858 858 if inline:
859 859 df = self.opener(self.indexfile)
860 860 else:
861 861 df = self.opener(self.datafile)
862 862 df.seek(start)
863 863 self.chunkcache = (start, df.read(cache_length))
864 864
865 865 if not self.chunkcache:
866 866 loadcache(df)
867 867
868 868 cache_start = self.chunkcache[0]
869 869 cache_end = cache_start + len(self.chunkcache[1])
870 870 if start >= cache_start and end <= cache_end:
871 871 # it is cached
872 872 offset = start - cache_start
873 873 else:
874 874 loadcache(df)
875 875 offset = 0
876 876
877 877 #def checkchunk():
878 878 # df = self.opener(self.datafile)
879 879 # df.seek(start)
880 880 # return df.read(length)
881 881 #assert s == checkchunk()
882 882 return decompress(self.chunkcache[1][offset:offset + length])
883 883
884 884 def delta(self, node):
885 885 """return or calculate a delta between a node and its predecessor"""
886 886 r = self.rev(node)
887 887 return self.revdiff(r - 1, r)
888 888
889 889 def revdiff(self, rev1, rev2):
890 890 """return or calculate a delta between two revisions"""
891 891 b1 = self.base(rev1)
892 892 b2 = self.base(rev2)
893 893 if b1 == b2 and rev1 + 1 == rev2:
894 894 return self.chunk(rev2)
895 895 else:
896 896 return self.diff(self.revision(self.node(rev1)),
897 897 self.revision(self.node(rev2)))
898 898
899 899 def revision(self, node):
900 900 """return an uncompressed revision of a given"""
901 901 if node == nullid: return ""
902 902 if self.cache and self.cache[0] == node: return self.cache[2]
903 903
904 904 # look up what we need to read
905 905 text = None
906 906 rev = self.rev(node)
907 907 base = self.base(rev)
908 908
909 909 if self.inlinedata():
910 910 # we probably have the whole chunk cached
911 911 df = None
912 912 else:
913 913 df = self.opener(self.datafile)
914 914
915 915 # do we have useful data cached?
916 916 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
917 917 base = self.cache[1]
918 918 text = self.cache[2]
919 919 self.loadindex(base, rev + 1)
920 920 else:
921 921 self.loadindex(base, rev + 1)
922 922 text = self.chunk(base, df=df)
923 923
924 924 bins = []
925 925 for r in xrange(base + 1, rev + 1):
926 926 bins.append(self.chunk(r, df=df))
927 927
928 928 text = self.patches(text, bins)
929 929
930 930 p1, p2 = self.parents(node)
931 931 if node != hash(text, p1, p2):
932 932 raise RevlogError(_("integrity check failed on %s:%d")
933 933 % (self.datafile, rev))
934 934
935 935 self.cache = (node, rev, text)
936 936 return text
937 937
938 938 def checkinlinesize(self, tr, fp=None):
939 939 if not self.inlinedata():
940 940 return
941 941 if not fp:
942 942 fp = self.opener(self.indexfile, 'r')
943 943 fp.seek(0, 2)
944 944 size = fp.tell()
945 945 if size < 131072:
946 946 return
947 947 trinfo = tr.find(self.indexfile)
948 948 if trinfo == None:
949 949 raise RevlogError(_("%s not found in the transaction")
950 950 % self.indexfile)
951 951
952 952 trindex = trinfo[2]
953 953 dataoff = self.start(trindex)
954 954
955 955 tr.add(self.datafile, dataoff)
956 956 df = self.opener(self.datafile, 'w')
957 957 calc = struct.calcsize(self.indexformat)
958 958 for r in xrange(self.count()):
959 959 start = self.start(r) + (r + 1) * calc
960 960 length = self.length(r)
961 961 fp.seek(start)
962 962 d = fp.read(length)
963 963 df.write(d)
964 964 fp.close()
965 965 df.close()
966 966 fp = self.opener(self.indexfile, 'w', atomictemp=True)
967 967 self.version &= ~(REVLOGNGINLINEDATA)
968 968 if self.count():
969 969 x = self.index[0]
970 970 e = struct.pack(self.indexformat, *x)[4:]
971 971 l = struct.pack(versionformat, self.version)
972 972 fp.write(l)
973 973 fp.write(e)
974 974
975 975 for i in xrange(1, self.count()):
976 976 x = self.index[i]
977 977 e = struct.pack(self.indexformat, *x)
978 978 fp.write(e)
979 979
980 980 # if we don't call rename, the temp file will never replace the
981 981 # real index
982 982 fp.rename()
983 983
984 984 tr.replace(self.indexfile, trindex * calc)
985 985 self.chunkcache = None
986 986
987 987 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
988 988 """add a revision to the log
989 989
990 990 text - the revision data to add
991 991 transaction - the transaction object used for rollback
992 992 link - the linkrev data to add
993 993 p1, p2 - the parent nodeids of the revision
994 994 d - an optional precomputed delta
995 995 """
996 996 if not self.inlinedata():
997 997 dfh = self.opener(self.datafile, "a")
998 998 else:
999 999 dfh = None
1000 1000 ifh = self.opener(self.indexfile, "a+")
1001 1001 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1002 1002
1003 1003 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1004 1004 if text is None: text = ""
1005 1005 if p1 is None: p1 = self.tip()
1006 1006 if p2 is None: p2 = nullid
1007 1007
1008 1008 node = hash(text, p1, p2)
1009 1009
1010 1010 if node in self.nodemap:
1011 1011 return node
1012 1012
1013 1013 n = self.count()
1014 1014 t = n - 1
1015 1015
1016 1016 if n:
1017 1017 base = self.base(t)
1018 1018 start = self.start(base)
1019 1019 end = self.end(t)
1020 1020 if not d:
1021 1021 prev = self.revision(self.tip())
1022 1022 d = self.diff(prev, text)
1023 1023 data = compress(d)
1024 1024 l = len(data[1]) + len(data[0])
1025 1025 dist = end - start + l
1026 1026
1027 1027 # full versions are inserted when the needed deltas
1028 1028 # become comparable to the uncompressed text
1029 1029 if not n or dist > len(text) * 2:
1030 1030 data = compress(text)
1031 1031 l = len(data[1]) + len(data[0])
1032 1032 base = n
1033 1033 else:
1034 1034 base = self.base(t)
1035 1035
1036 1036 offset = 0
1037 1037 if t >= 0:
1038 1038 offset = self.end(t)
1039 1039
1040 1040 if self.version == REVLOGV0:
1041 1041 e = (offset, l, base, link, p1, p2, node)
1042 1042 else:
1043 1043 e = (self.offset_type(offset, 0), l, len(text),
1044 1044 base, link, self.rev(p1), self.rev(p2), node)
1045 1045
1046 1046 self.index.append(e)
1047 1047 self.nodemap[node] = n
1048 1048 entry = struct.pack(self.indexformat, *e)
1049 1049
1050 1050 if not self.inlinedata():
1051 1051 transaction.add(self.datafile, offset)
1052 1052 transaction.add(self.indexfile, n * len(entry))
1053 1053 if data[0]:
1054 1054 dfh.write(data[0])
1055 1055 dfh.write(data[1])
1056 1056 dfh.flush()
1057 1057 else:
1058 1058 ifh.seek(0, 2)
1059 1059 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1060 1060
1061 1061 if len(self.index) == 1 and self.version != REVLOGV0:
1062 1062 l = struct.pack(versionformat, self.version)
1063 1063 ifh.write(l)
1064 1064 entry = entry[4:]
1065 1065
1066 1066 ifh.write(entry)
1067 1067
1068 1068 if self.inlinedata():
1069 1069 ifh.write(data[0])
1070 1070 ifh.write(data[1])
1071 1071 self.checkinlinesize(transaction, ifh)
1072 1072
1073 1073 self.cache = (node, n, text)
1074 1074 return node
1075 1075
1076 1076 def ancestor(self, a, b):
1077 1077 """calculate the least common ancestor of nodes a and b"""
1078 1078
1079 1079 def parents(rev):
1080 1080 return [p for p in self.parentrevs(rev) if p != nullrev]
1081 1081
1082 1082 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1083 1083 if c is None:
1084 1084 return nullid
1085 1085
1086 1086 return self.node(c)
1087 1087
1088 1088 def group(self, nodelist, lookup, infocollect=None):
1089 1089 """calculate a delta group
1090 1090
1091 1091 Given a list of changeset revs, return a set of deltas and
1092 1092 metadata corresponding to nodes. the first delta is
1093 1093 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1094 1094 have this parent as it has all history before these
1095 1095 changesets. parent is parent[0]
1096 1096 """
1097 1097 revs = [self.rev(n) for n in nodelist]
1098 1098
1099 1099 # if we don't have any revisions touched by these changesets, bail
1100 1100 if not revs:
1101 1101 yield changegroup.closechunk()
1102 1102 return
1103 1103
1104 1104 # add the parent of the first rev
1105 1105 p = self.parents(self.node(revs[0]))[0]
1106 1106 revs.insert(0, self.rev(p))
1107 1107
1108 1108 # build deltas
1109 1109 for d in xrange(0, len(revs) - 1):
1110 1110 a, b = revs[d], revs[d + 1]
1111 1111 nb = self.node(b)
1112 1112
1113 1113 if infocollect is not None:
1114 1114 infocollect(nb)
1115 1115
1116 1116 d = self.revdiff(a, b)
1117 1117 p = self.parents(nb)
1118 1118 meta = nb + p[0] + p[1] + lookup(nb)
1119 1119 yield changegroup.genchunk("%s%s" % (meta, d))
1120 1120
1121 1121 yield changegroup.closechunk()
1122 1122
1123 1123 def addgroup(self, revs, linkmapper, transaction, unique=0):
1124 1124 """
1125 1125 add a delta group
1126 1126
1127 1127 given a set of deltas, add them to the revision log. the
1128 1128 first delta is against its parent, which should be in our
1129 1129 log, the rest are against the previous delta.
1130 1130 """
1131 1131
1132 1132 #track the base of the current delta log
1133 1133 r = self.count()
1134 1134 t = r - 1
1135 1135 node = None
1136 1136
1137 1137 base = prev = nullrev
1138 1138 start = end = textlen = 0
1139 1139 if r:
1140 1140 end = self.end(t)
1141 1141
1142 1142 ifh = self.opener(self.indexfile, "a+")
1143 1143 ifh.seek(0, 2)
1144 1144 transaction.add(self.indexfile, ifh.tell(), self.count())
1145 1145 if self.inlinedata():
1146 1146 dfh = None
1147 1147 else:
1148 1148 transaction.add(self.datafile, end)
1149 1149 dfh = self.opener(self.datafile, "a")
1150 1150
1151 1151 # loop through our set of deltas
1152 1152 chain = None
1153 1153 for chunk in revs:
1154 1154 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1155 1155 link = linkmapper(cs)
1156 1156 if node in self.nodemap:
1157 1157 # this can happen if two branches make the same change
1158 1158 # if unique:
1159 1159 # raise RevlogError(_("already have %s") % hex(node[:4]))
1160 1160 chain = node
1161 1161 continue
1162 1162 delta = chunk[80:]
1163 1163
1164 1164 for p in (p1, p2):
1165 1165 if not p in self.nodemap:
1166 1166 raise LookupError(_("unknown parent %s") % short(p))
1167 1167
1168 1168 if not chain:
1169 1169 # retrieve the parent revision of the delta chain
1170 1170 chain = p1
1171 1171 if not chain in self.nodemap:
1172 1172 raise LookupError(_("unknown base %s") % short(chain[:4]))
1173 1173
1174 1174 # full versions are inserted when the needed deltas become
1175 1175 # comparable to the uncompressed text or when the previous
1176 1176 # version is not the one we have a delta against. We use
1177 1177 # the size of the previous full rev as a proxy for the
1178 1178 # current size.
1179 1179
1180 1180 if chain == prev:
1181 1181 tempd = compress(delta)
1182 1182 cdelta = tempd[0] + tempd[1]
1183 1183 textlen = mdiff.patchedsize(textlen, delta)
1184 1184
1185 1185 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1186 1186 # flush our writes here so we can read it in revision
1187 1187 if dfh:
1188 1188 dfh.flush()
1189 1189 ifh.flush()
1190 1190 text = self.revision(chain)
1191 1191 text = self.patches(text, [delta])
1192 1192 chk = self._addrevision(text, transaction, link, p1, p2, None,
1193 1193 ifh, dfh)
1194 1194 if not dfh and not self.inlinedata():
1195 1195 # addrevision switched from inline to conventional
1196 1196 # reopen the index
1197 1197 dfh = self.opener(self.datafile, "a")
1198 1198 ifh = self.opener(self.indexfile, "a")
1199 1199 if chk != node:
1200 1200 raise RevlogError(_("consistency error adding group"))
1201 1201 textlen = len(text)
1202 1202 else:
1203 1203 if self.version == REVLOGV0:
1204 1204 e = (end, len(cdelta), base, link, p1, p2, node)
1205 1205 else:
1206 1206 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1207 1207 link, self.rev(p1), self.rev(p2), node)
1208 1208 self.index.append(e)
1209 1209 self.nodemap[node] = r
1210 1210 if self.inlinedata():
1211 1211 ifh.write(struct.pack(self.indexformat, *e))
1212 1212 ifh.write(cdelta)
1213 1213 self.checkinlinesize(transaction, ifh)
1214 1214 if not self.inlinedata():
1215 1215 dfh = self.opener(self.datafile, "a")
1216 1216 ifh = self.opener(self.indexfile, "a")
1217 1217 else:
1218 1218 dfh.write(cdelta)
1219 1219 ifh.write(struct.pack(self.indexformat, *e))
1220 1220
1221 1221 t, r, chain, prev = r, r + 1, node, node
1222 1222 base = self.base(t)
1223 1223 start = self.start(base)
1224 1224 end = self.end(t)
1225 1225
1226 1226 return node
1227 1227
1228 1228 def strip(self, rev, minlink):
1229 1229 if self.count() == 0 or rev >= self.count():
1230 1230 return
1231 1231
1232 1232 if isinstance(self.index, lazyindex):
1233 1233 self.loadindexmap()
1234 1234
1235 1235 # When stripping away a revision, we need to make sure it
1236 1236 # does not actually belong to an older changeset.
1237 1237 # The minlink parameter defines the oldest revision
1238 1238 # we're allowed to strip away.
1239 1239 while minlink > self.index[rev][-4]:
1240 1240 rev += 1
1241 1241 if rev >= self.count():
1242 1242 return
1243 1243
1244 1244 # first truncate the files on disk
1245 1245 end = self.start(rev)
1246 1246 if not self.inlinedata():
1247 1247 df = self.opener(self.datafile, "a")
1248 1248 df.truncate(end)
1249 1249 end = rev * struct.calcsize(self.indexformat)
1250 1250 else:
1251 1251 end += rev * struct.calcsize(self.indexformat)
1252 1252
1253 1253 indexf = self.opener(self.indexfile, "a")
1254 1254 indexf.truncate(end)
1255 1255
1256 1256 # then reset internal state in memory to forget those revisions
1257 1257 self.cache = None
1258 1258 self.chunkcache = None
1259 1259 for x in xrange(rev, self.count()):
1260 1260 del self.nodemap[self.node(x)]
1261 1261
1262 1262 del self.index[rev:]
1263 1263
1264 1264 def checksize(self):
1265 1265 expected = 0
1266 1266 if self.count():
1267 1267 expected = self.end(self.count() - 1)
1268 1268
1269 1269 try:
1270 1270 f = self.opener(self.datafile)
1271 1271 f.seek(0, 2)
1272 1272 actual = f.tell()
1273 1273 dd = actual - expected
1274 1274 except IOError, inst:
1275 1275 if inst.errno != errno.ENOENT:
1276 1276 raise
1277 1277 dd = 0
1278 1278
1279 1279 try:
1280 1280 f = self.opener(self.indexfile)
1281 1281 f.seek(0, 2)
1282 1282 actual = f.tell()
1283 1283 s = struct.calcsize(self.indexformat)
1284 1284 i = actual / s
1285 1285 di = actual - (i * s)
1286 1286 if self.inlinedata():
1287 1287 databytes = 0
1288 1288 for r in xrange(self.count()):
1289 1289 databytes += self.length(r)
1290 1290 dd = 0
1291 1291 di = actual - self.count() * s - databytes
1292 1292 except IOError, inst:
1293 1293 if inst.errno != errno.ENOENT:
1294 1294 raise
1295 1295 di = 0
1296 1296
1297 1297 return (dd, di)
1298 1298
1299 1299
General Comments 0
You need to be logged in to leave comments. Login now