##// END OF EJS Templates
Avoid negative block sizes in lazyparser....
Alexis S. L. Carvalho -
r3089:e7fc04dc default
parent child Browse files
Show More
@@ -1,1303 +1,1303 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 142 if blockstart + blocksize > self.datasize:
143 143 # the revlog may have grown since we've started running,
144 144 # but we don't have space in self.index for more entries.
145 145 # limit blocksize so that we don't get too much data.
146 blocksize = self.datasize - blockstart
146 blocksize = max(self.datasize - blockstart, 0)
147 147 data = self.dataf.read(blocksize)
148 148 lend = len(data) / self.s
149 149 i = blockstart / self.s
150 150 off = 0
151 151 for x in xrange(lend):
152 152 if self.index[i + x] == None:
153 153 b = data[off : off + self.s]
154 154 self.index[i + x] = b
155 155 n = b[self.shaoffset:self.shaoffset + 20]
156 156 self.map[n] = i + x
157 157 off += self.s
158 158
159 159 def findnode(self, node):
160 160 """search backwards through the index file for a specific node"""
161 161 if self.allmap: return None
162 162
163 163 # hg log will cause many many searches for the manifest
164 164 # nodes. After we get called a few times, just load the whole
165 165 # thing.
166 166 if self.mapfind_count > 8:
167 167 self.loadmap()
168 168 if node in self.map:
169 169 return node
170 170 return None
171 171 self.mapfind_count += 1
172 172 last = self.l - 1
173 173 while self.index[last] != None:
174 174 if last == 0:
175 175 self.all = 1
176 176 self.allmap = 1
177 177 return None
178 178 last -= 1
179 179 end = (last + 1) * self.s
180 180 blocksize = self.s * 256
181 181 while end >= 0:
182 182 start = max(end - blocksize, 0)
183 183 self.dataf.seek(start)
184 184 data = self.dataf.read(end - start)
185 185 findend = end - start
186 186 while True:
187 187 # we're searching backwards, so weh have to make sure
188 188 # we don't find a changeset where this node is a parent
189 189 off = data.rfind(node, 0, findend)
190 190 findend = off
191 191 if off >= 0:
192 192 i = off / self.s
193 193 off = i * self.s
194 194 n = data[off + self.shaoffset:off + self.shaoffset + 20]
195 195 if n == node:
196 196 self.map[n] = i + start / self.s
197 197 return node
198 198 else:
199 199 break
200 200 end -= blocksize
201 201 return None
202 202
203 203 def loadindex(self, i=None, end=None):
204 204 if self.all: return
205 205 all = False
206 206 if i == None:
207 207 blockstart = 0
208 208 blocksize = (512 / self.s) * self.s
209 209 end = self.datasize
210 210 all = True
211 211 else:
212 212 if end:
213 213 blockstart = i * self.s
214 214 end = end * self.s
215 215 blocksize = end - blockstart
216 216 else:
217 217 blockstart = (i & ~(32)) * self.s
218 218 blocksize = self.s * 64
219 219 end = blockstart + blocksize
220 220 while blockstart < end:
221 221 self.loadblock(blockstart, blocksize)
222 222 blockstart += blocksize
223 223 if all: self.all = True
224 224
225 225 class lazyindex(object):
226 226 """a lazy version of the index array"""
227 227 def __init__(self, parser):
228 228 self.p = parser
229 229 def __len__(self):
230 230 return len(self.p.index)
231 231 def load(self, pos):
232 232 if pos < 0:
233 233 pos += len(self.p.index)
234 234 self.p.loadindex(pos)
235 235 return self.p.index[pos]
236 236 def __getitem__(self, pos):
237 237 ret = self.p.index[pos] or self.load(pos)
238 238 if isinstance(ret, str):
239 239 ret = struct.unpack(self.p.indexformat, ret)
240 240 return ret
241 241 def __setitem__(self, pos, item):
242 242 self.p.index[pos] = item
243 243 def __delitem__(self, pos):
244 244 del self.p.index[pos]
245 245 def append(self, e):
246 246 self.p.index.append(e)
247 247
248 248 class lazymap(object):
249 249 """a lazy version of the node map"""
250 250 def __init__(self, parser):
251 251 self.p = parser
252 252 def load(self, key):
253 253 n = self.p.findnode(key)
254 254 if n == None:
255 255 raise KeyError(key)
256 256 def __contains__(self, key):
257 257 if key in self.p.map:
258 258 return True
259 259 self.p.loadmap()
260 260 return key in self.p.map
261 261 def __iter__(self):
262 262 yield nullid
263 263 for i in xrange(self.p.l):
264 264 ret = self.p.index[i]
265 265 if not ret:
266 266 self.p.loadindex(i)
267 267 ret = self.p.index[i]
268 268 if isinstance(ret, str):
269 269 ret = struct.unpack(self.p.indexformat, ret)
270 270 yield ret[-1]
271 271 def __getitem__(self, key):
272 272 try:
273 273 return self.p.map[key]
274 274 except KeyError:
275 275 try:
276 276 self.load(key)
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 raise KeyError("node " + hex(key))
280 280 def __setitem__(self, key, val):
281 281 self.p.map[key] = val
282 282 def __delitem__(self, key):
283 283 del self.p.map[key]
284 284
285 285 class RevlogError(Exception): pass
286 286
287 287 class revlog(object):
288 288 """
289 289 the underlying revision storage object
290 290
291 291 A revlog consists of two parts, an index and the revision data.
292 292
293 293 The index is a file with a fixed record size containing
294 294 information on each revision, includings its nodeid (hash), the
295 295 nodeids of its parents, the position and offset of its data within
296 296 the data file, and the revision it's based on. Finally, each entry
297 297 contains a linkrev entry that can serve as a pointer to external
298 298 data.
299 299
300 300 The revision data itself is a linear collection of data chunks.
301 301 Each chunk represents a revision and is usually represented as a
302 302 delta against the previous chunk. To bound lookup time, runs of
303 303 deltas are limited to about 2 times the length of the original
304 304 version data. This makes retrieval of a version proportional to
305 305 its size, or O(1) relative to the number of revisions.
306 306
307 307 Both pieces of the revlog are written to in an append-only
308 308 fashion, which means we never need to rewrite a file to insert or
309 309 remove data, and can use some simple techniques to avoid the need
310 310 for locking while reading.
311 311 """
312 312 def __init__(self, opener, indexfile, datafile,
313 313 defversion=REVLOG_DEFAULT_VERSION):
314 314 """
315 315 create a revlog object
316 316
317 317 opener is a function that abstracts the file opening operation
318 318 and can be used to implement COW semantics or the like.
319 319 """
320 320 self.indexfile = indexfile
321 321 self.datafile = datafile
322 322 self.opener = opener
323 323
324 324 self.indexstat = None
325 325 self.cache = None
326 326 self.chunkcache = None
327 327 self.defversion = defversion
328 328 self.load()
329 329
330 330 def load(self):
331 331 v = self.defversion
332 332 try:
333 333 f = self.opener(self.indexfile)
334 334 i = f.read(4)
335 335 f.seek(0)
336 336 except IOError, inst:
337 337 if inst.errno != errno.ENOENT:
338 338 raise
339 339 i = ""
340 340 else:
341 341 try:
342 342 st = util.fstat(f)
343 343 except AttributeError, inst:
344 344 st = None
345 345 else:
346 346 oldst = self.indexstat
347 347 if (oldst and st.st_dev == oldst.st_dev
348 348 and st.st_ino == oldst.st_ino
349 349 and st.st_mtime == oldst.st_mtime
350 350 and st.st_ctime == oldst.st_ctime):
351 351 return
352 352 self.indexstat = st
353 353 if len(i) > 0:
354 354 v = struct.unpack(versionformat, i)[0]
355 355 flags = v & ~0xFFFF
356 356 fmt = v & 0xFFFF
357 357 if fmt == REVLOGV0:
358 358 if flags:
359 359 raise RevlogError(_("index %s invalid flags %x for format v0" %
360 360 (self.indexfile, flags)))
361 361 elif fmt == REVLOGNG:
362 362 if flags & ~REVLOGNGINLINEDATA:
363 363 raise RevlogError(_("index %s invalid flags %x for revlogng" %
364 364 (self.indexfile, flags)))
365 365 else:
366 366 raise RevlogError(_("index %s invalid format %d" %
367 367 (self.indexfile, fmt)))
368 368 self.version = v
369 369 if v == REVLOGV0:
370 370 self.indexformat = indexformatv0
371 371 shaoffset = v0shaoffset
372 372 else:
373 373 self.indexformat = indexformatng
374 374 shaoffset = ngshaoffset
375 375
376 376 if i:
377 377 if (lazyparser.safe_to_use and not self.inlinedata() and
378 378 st and st.st_size > 10000):
379 379 # big index, let's parse it on demand
380 380 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
381 381 self.index = lazyindex(parser)
382 382 self.nodemap = lazymap(parser)
383 383 else:
384 384 self.parseindex(f, st)
385 385 if self.version != REVLOGV0:
386 386 e = list(self.index[0])
387 387 type = self.ngtype(e[0])
388 388 e[0] = self.offset_type(0, type)
389 389 self.index[0] = e
390 390 else:
391 391 self.nodemap = { nullid: -1}
392 392 self.index = []
393 393
394 394
395 395 def parseindex(self, fp, st):
396 396 s = struct.calcsize(self.indexformat)
397 397 self.index = []
398 398 self.nodemap = {nullid: -1}
399 399 inline = self.inlinedata()
400 400 n = 0
401 401 leftover = None
402 402 while True:
403 403 if st:
404 404 data = fp.read(65536)
405 405 else:
406 406 # hack for httprangereader, it doesn't do partial reads well
407 407 data = fp.read()
408 408 if not data:
409 409 break
410 410 if n == 0 and self.inlinedata():
411 411 # cache the first chunk
412 412 self.chunkcache = (0, data)
413 413 if leftover:
414 414 data = leftover + data
415 415 leftover = None
416 416 off = 0
417 417 l = len(data)
418 418 while off < l:
419 419 if l - off < s:
420 420 leftover = data[off:]
421 421 break
422 422 cur = data[off:off + s]
423 423 off += s
424 424 e = struct.unpack(self.indexformat, cur)
425 425 self.index.append(e)
426 426 self.nodemap[e[-1]] = n
427 427 n += 1
428 428 if inline:
429 429 off += e[1]
430 430 if off > l:
431 431 # some things don't seek well, just read it
432 432 fp.read(off - l)
433 433 if not st:
434 434 break
435 435
436 436
437 437 def ngoffset(self, q):
438 438 if q & 0xFFFF:
439 439 raise RevlogError(_('%s: incompatible revision flag %x') %
440 440 (self.indexfile, q))
441 441 return long(q >> 16)
442 442
443 443 def ngtype(self, q):
444 444 return int(q & 0xFFFF)
445 445
446 446 def offset_type(self, offset, type):
447 447 return long(long(offset) << 16 | type)
448 448
449 449 def loadindex(self, start, end):
450 450 """load a block of indexes all at once from the lazy parser"""
451 451 if isinstance(self.index, lazyindex):
452 452 self.index.p.loadindex(start, end)
453 453
454 454 def loadindexmap(self):
455 455 """loads both the map and the index from the lazy parser"""
456 456 if isinstance(self.index, lazyindex):
457 457 p = self.index.p
458 458 p.loadindex()
459 459 self.nodemap = p.map
460 460
461 461 def loadmap(self):
462 462 """loads the map from the lazy parser"""
463 463 if isinstance(self.nodemap, lazymap):
464 464 self.nodemap.p.loadmap()
465 465 self.nodemap = self.nodemap.p.map
466 466
467 467 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
468 468 def tip(self): return self.node(len(self.index) - 1)
469 469 def count(self): return len(self.index)
470 470 def node(self, rev):
471 471 return (rev < 0) and nullid or self.index[rev][-1]
472 472 def rev(self, node):
473 473 try:
474 474 return self.nodemap[node]
475 475 except KeyError:
476 476 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
477 477 def linkrev(self, node):
478 478 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
479 479 def parents(self, node):
480 480 if node == nullid: return (nullid, nullid)
481 481 r = self.rev(node)
482 482 d = self.index[r][-3:-1]
483 483 if self.version == REVLOGV0:
484 484 return d
485 485 return [ self.node(x) for x in d ]
486 486 def parentrevs(self, rev):
487 487 if rev == -1:
488 488 return (-1, -1)
489 489 d = self.index[rev][-3:-1]
490 490 if self.version == REVLOGV0:
491 491 return [ self.rev(x) for x in d ]
492 492 return d
493 493 def start(self, rev):
494 494 if rev < 0:
495 495 return -1
496 496 if self.version != REVLOGV0:
497 497 return self.ngoffset(self.index[rev][0])
498 498 return self.index[rev][0]
499 499
500 500 def end(self, rev): return self.start(rev) + self.length(rev)
501 501
502 502 def size(self, rev):
503 503 """return the length of the uncompressed text for a given revision"""
504 504 l = -1
505 505 if self.version != REVLOGV0:
506 506 l = self.index[rev][2]
507 507 if l >= 0:
508 508 return l
509 509
510 510 t = self.revision(self.node(rev))
511 511 return len(t)
512 512
513 513 # alternate implementation, The advantage to this code is it
514 514 # will be faster for a single revision. But, the results are not
515 515 # cached, so finding the size of every revision will be slower.
516 516 """
517 517 if self.cache and self.cache[1] == rev:
518 518 return len(self.cache[2])
519 519
520 520 base = self.base(rev)
521 521 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
522 522 base = self.cache[1]
523 523 text = self.cache[2]
524 524 else:
525 525 text = self.revision(self.node(base))
526 526
527 527 l = len(text)
528 528 for x in xrange(base + 1, rev + 1):
529 529 l = mdiff.patchedsize(l, self.chunk(x))
530 530 return l
531 531 """
532 532
533 533 def length(self, rev):
534 534 if rev < 0:
535 535 return 0
536 536 else:
537 537 return self.index[rev][1]
538 538 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
539 539
540 540 def reachable(self, rev, stop=None):
541 541 reachable = {}
542 542 visit = [rev]
543 543 reachable[rev] = 1
544 544 if stop:
545 545 stopn = self.rev(stop)
546 546 else:
547 547 stopn = 0
548 548 while visit:
549 549 n = visit.pop(0)
550 550 if n == stop:
551 551 continue
552 552 if n == nullid:
553 553 continue
554 554 for p in self.parents(n):
555 555 if self.rev(p) < stopn:
556 556 continue
557 557 if p not in reachable:
558 558 reachable[p] = 1
559 559 visit.append(p)
560 560 return reachable
561 561
562 562 def nodesbetween(self, roots=None, heads=None):
563 563 """Return a tuple containing three elements. Elements 1 and 2 contain
564 564 a final list bases and heads after all the unreachable ones have been
565 565 pruned. Element 0 contains a topologically sorted list of all
566 566
567 567 nodes that satisfy these constraints:
568 568 1. All nodes must be descended from a node in roots (the nodes on
569 569 roots are considered descended from themselves).
570 570 2. All nodes must also be ancestors of a node in heads (the nodes in
571 571 heads are considered to be their own ancestors).
572 572
573 573 If roots is unspecified, nullid is assumed as the only root.
574 574 If heads is unspecified, it is taken to be the output of the
575 575 heads method (i.e. a list of all nodes in the repository that
576 576 have no children)."""
577 577 nonodes = ([], [], [])
578 578 if roots is not None:
579 579 roots = list(roots)
580 580 if not roots:
581 581 return nonodes
582 582 lowestrev = min([self.rev(n) for n in roots])
583 583 else:
584 584 roots = [nullid] # Everybody's a descendent of nullid
585 585 lowestrev = -1
586 586 if (lowestrev == -1) and (heads is None):
587 587 # We want _all_ the nodes!
588 588 return ([self.node(r) for r in xrange(0, self.count())],
589 589 [nullid], list(self.heads()))
590 590 if heads is None:
591 591 # All nodes are ancestors, so the latest ancestor is the last
592 592 # node.
593 593 highestrev = self.count() - 1
594 594 # Set ancestors to None to signal that every node is an ancestor.
595 595 ancestors = None
596 596 # Set heads to an empty dictionary for later discovery of heads
597 597 heads = {}
598 598 else:
599 599 heads = list(heads)
600 600 if not heads:
601 601 return nonodes
602 602 ancestors = {}
603 603 # Start at the top and keep marking parents until we're done.
604 604 nodestotag = heads[:]
605 605 # Turn heads into a dictionary so we can remove 'fake' heads.
606 606 # Also, later we will be using it to filter out the heads we can't
607 607 # find from roots.
608 608 heads = dict.fromkeys(heads, 0)
609 609 # Remember where the top was so we can use it as a limit later.
610 610 highestrev = max([self.rev(n) for n in nodestotag])
611 611 while nodestotag:
612 612 # grab a node to tag
613 613 n = nodestotag.pop()
614 614 # Never tag nullid
615 615 if n == nullid:
616 616 continue
617 617 # A node's revision number represents its place in a
618 618 # topologically sorted list of nodes.
619 619 r = self.rev(n)
620 620 if r >= lowestrev:
621 621 if n not in ancestors:
622 622 # If we are possibly a descendent of one of the roots
623 623 # and we haven't already been marked as an ancestor
624 624 ancestors[n] = 1 # Mark as ancestor
625 625 # Add non-nullid parents to list of nodes to tag.
626 626 nodestotag.extend([p for p in self.parents(n) if
627 627 p != nullid])
628 628 elif n in heads: # We've seen it before, is it a fake head?
629 629 # So it is, real heads should not be the ancestors of
630 630 # any other heads.
631 631 heads.pop(n)
632 632 if not ancestors:
633 633 return nonodes
634 634 # Now that we have our set of ancestors, we want to remove any
635 635 # roots that are not ancestors.
636 636
637 637 # If one of the roots was nullid, everything is included anyway.
638 638 if lowestrev > -1:
639 639 # But, since we weren't, let's recompute the lowest rev to not
640 640 # include roots that aren't ancestors.
641 641
642 642 # Filter out roots that aren't ancestors of heads
643 643 roots = [n for n in roots if n in ancestors]
644 644 # Recompute the lowest revision
645 645 if roots:
646 646 lowestrev = min([self.rev(n) for n in roots])
647 647 else:
648 648 # No more roots? Return empty list
649 649 return nonodes
650 650 else:
651 651 # We are descending from nullid, and don't need to care about
652 652 # any other roots.
653 653 lowestrev = -1
654 654 roots = [nullid]
655 655 # Transform our roots list into a 'set' (i.e. a dictionary where the
656 656 # values don't matter.
657 657 descendents = dict.fromkeys(roots, 1)
658 658 # Also, keep the original roots so we can filter out roots that aren't
659 659 # 'real' roots (i.e. are descended from other roots).
660 660 roots = descendents.copy()
661 661 # Our topologically sorted list of output nodes.
662 662 orderedout = []
663 663 # Don't start at nullid since we don't want nullid in our output list,
664 664 # and if nullid shows up in descedents, empty parents will look like
665 665 # they're descendents.
666 666 for r in xrange(max(lowestrev, 0), highestrev + 1):
667 667 n = self.node(r)
668 668 isdescendent = False
669 669 if lowestrev == -1: # Everybody is a descendent of nullid
670 670 isdescendent = True
671 671 elif n in descendents:
672 672 # n is already a descendent
673 673 isdescendent = True
674 674 # This check only needs to be done here because all the roots
675 675 # will start being marked is descendents before the loop.
676 676 if n in roots:
677 677 # If n was a root, check if it's a 'real' root.
678 678 p = tuple(self.parents(n))
679 679 # If any of its parents are descendents, it's not a root.
680 680 if (p[0] in descendents) or (p[1] in descendents):
681 681 roots.pop(n)
682 682 else:
683 683 p = tuple(self.parents(n))
684 684 # A node is a descendent if either of its parents are
685 685 # descendents. (We seeded the dependents list with the roots
686 686 # up there, remember?)
687 687 if (p[0] in descendents) or (p[1] in descendents):
688 688 descendents[n] = 1
689 689 isdescendent = True
690 690 if isdescendent and ((ancestors is None) or (n in ancestors)):
691 691 # Only include nodes that are both descendents and ancestors.
692 692 orderedout.append(n)
693 693 if (ancestors is not None) and (n in heads):
694 694 # We're trying to figure out which heads are reachable
695 695 # from roots.
696 696 # Mark this head as having been reached
697 697 heads[n] = 1
698 698 elif ancestors is None:
699 699 # Otherwise, we're trying to discover the heads.
700 700 # Assume this is a head because if it isn't, the next step
701 701 # will eventually remove it.
702 702 heads[n] = 1
703 703 # But, obviously its parents aren't.
704 704 for p in self.parents(n):
705 705 heads.pop(p, None)
706 706 heads = [n for n in heads.iterkeys() if heads[n] != 0]
707 707 roots = roots.keys()
708 708 assert orderedout
709 709 assert roots
710 710 assert heads
711 711 return (orderedout, roots, heads)
712 712
713 713 def heads(self, start=None):
714 714 """return the list of all nodes that have no children
715 715
716 716 if start is specified, only heads that are descendants of
717 717 start will be returned
718 718
719 719 """
720 720 if start is None:
721 721 start = nullid
722 722 startrev = self.rev(start)
723 723 reachable = {startrev: 1}
724 724 heads = {startrev: 1}
725 725
726 726 parentrevs = self.parentrevs
727 727 for r in xrange(startrev + 1, self.count()):
728 728 for p in parentrevs(r):
729 729 if p in reachable:
730 730 reachable[r] = 1
731 731 heads[r] = 1
732 732 if p in heads:
733 733 del heads[p]
734 734 return [self.node(r) for r in heads]
735 735
736 736 def children(self, node):
737 737 """find the children of a given node"""
738 738 c = []
739 739 p = self.rev(node)
740 740 for r in range(p + 1, self.count()):
741 741 n = self.node(r)
742 742 for pn in self.parents(n):
743 743 if pn == node:
744 744 c.append(n)
745 745 continue
746 746 elif pn == nullid:
747 747 continue
748 748 return c
749 749
750 750 def lookup(self, id):
751 751 """locate a node based on revision number or subset of hex nodeid"""
752 752 if type(id) == type(0):
753 753 return self.node(id)
754 754 try:
755 755 rev = int(id)
756 756 if str(rev) != id: raise ValueError
757 757 if rev < 0: rev = self.count() + rev
758 758 if rev < 0 or rev >= self.count(): raise ValueError
759 759 return self.node(rev)
760 760 except (ValueError, OverflowError):
761 761 c = []
762 762 for n in self.nodemap:
763 763 if hex(n).startswith(id):
764 764 c.append(n)
765 765 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
766 766 if len(c) == 1: return c[0]
767 767
768 768 # might need fixing if we change hash lengths
769 769 if len(id) == 20 and id in self.nodemap:
770 770 return id
771 771
772 772 raise RevlogError(_("No match found"))
773 773
774 774 def cmp(self, node, text):
775 775 """compare text with a given file revision"""
776 776 p1, p2 = self.parents(node)
777 777 return hash(text, p1, p2) != node
778 778
779 779 def makenode(self, node, text):
780 780 """calculate a file nodeid for text, descended or possibly
781 781 unchanged from node"""
782 782
783 783 if self.cmp(node, text):
784 784 return hash(text, node, nullid)
785 785 return node
786 786
787 787 def diff(self, a, b):
788 788 """return a delta between two revisions"""
789 789 return mdiff.textdiff(a, b)
790 790
791 791 def patches(self, t, pl):
792 792 """apply a list of patches to a string"""
793 793 return mdiff.patches(t, pl)
794 794
795 795 def chunk(self, rev, df=None, cachelen=4096):
796 796 start, length = self.start(rev), self.length(rev)
797 797 inline = self.inlinedata()
798 798 if inline:
799 799 start += (rev + 1) * struct.calcsize(self.indexformat)
800 800 end = start + length
801 801 def loadcache(df):
802 802 cache_length = max(cachelen, length) # 4k
803 803 if not df:
804 804 if inline:
805 805 df = self.opener(self.indexfile)
806 806 else:
807 807 df = self.opener(self.datafile)
808 808 df.seek(start)
809 809 self.chunkcache = (start, df.read(cache_length))
810 810
811 811 if not self.chunkcache:
812 812 loadcache(df)
813 813
814 814 cache_start = self.chunkcache[0]
815 815 cache_end = cache_start + len(self.chunkcache[1])
816 816 if start >= cache_start and end <= cache_end:
817 817 # it is cached
818 818 offset = start - cache_start
819 819 else:
820 820 loadcache(df)
821 821 offset = 0
822 822
823 823 #def checkchunk():
824 824 # df = self.opener(self.datafile)
825 825 # df.seek(start)
826 826 # return df.read(length)
827 827 #assert s == checkchunk()
828 828 return decompress(self.chunkcache[1][offset:offset + length])
829 829
830 830 def delta(self, node):
831 831 """return or calculate a delta between a node and its predecessor"""
832 832 r = self.rev(node)
833 833 return self.revdiff(r - 1, r)
834 834
835 835 def revdiff(self, rev1, rev2):
836 836 """return or calculate a delta between two revisions"""
837 837 b1 = self.base(rev1)
838 838 b2 = self.base(rev2)
839 839 if b1 == b2 and rev1 + 1 == rev2:
840 840 return self.chunk(rev2)
841 841 else:
842 842 return self.diff(self.revision(self.node(rev1)),
843 843 self.revision(self.node(rev2)))
844 844
845 845 def revision(self, node):
846 846 """return an uncompressed revision of a given"""
847 847 if node == nullid: return ""
848 848 if self.cache and self.cache[0] == node: return self.cache[2]
849 849
850 850 # look up what we need to read
851 851 text = None
852 852 rev = self.rev(node)
853 853 base = self.base(rev)
854 854
855 855 if self.inlinedata():
856 856 # we probably have the whole chunk cached
857 857 df = None
858 858 else:
859 859 df = self.opener(self.datafile)
860 860
861 861 # do we have useful data cached?
862 862 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
863 863 base = self.cache[1]
864 864 text = self.cache[2]
865 865 self.loadindex(base, rev + 1)
866 866 else:
867 867 self.loadindex(base, rev + 1)
868 868 text = self.chunk(base, df=df)
869 869
870 870 bins = []
871 871 for r in xrange(base + 1, rev + 1):
872 872 bins.append(self.chunk(r, df=df))
873 873
874 874 text = self.patches(text, bins)
875 875
876 876 p1, p2 = self.parents(node)
877 877 if node != hash(text, p1, p2):
878 878 raise RevlogError(_("integrity check failed on %s:%d")
879 879 % (self.datafile, rev))
880 880
881 881 self.cache = (node, rev, text)
882 882 return text
883 883
884 884 def checkinlinesize(self, tr, fp=None):
885 885 if not self.inlinedata():
886 886 return
887 887 if not fp:
888 888 fp = self.opener(self.indexfile, 'r')
889 889 fp.seek(0, 2)
890 890 size = fp.tell()
891 891 if size < 131072:
892 892 return
893 893 trinfo = tr.find(self.indexfile)
894 894 if trinfo == None:
895 895 raise RevlogError(_("%s not found in the transaction" %
896 896 self.indexfile))
897 897
898 898 trindex = trinfo[2]
899 899 dataoff = self.start(trindex)
900 900
901 901 tr.add(self.datafile, dataoff)
902 902 df = self.opener(self.datafile, 'w')
903 903 calc = struct.calcsize(self.indexformat)
904 904 for r in xrange(self.count()):
905 905 start = self.start(r) + (r + 1) * calc
906 906 length = self.length(r)
907 907 fp.seek(start)
908 908 d = fp.read(length)
909 909 df.write(d)
910 910 fp.close()
911 911 df.close()
912 912 fp = self.opener(self.indexfile, 'w', atomictemp=True)
913 913 self.version &= ~(REVLOGNGINLINEDATA)
914 914 if self.count():
915 915 x = self.index[0]
916 916 e = struct.pack(self.indexformat, *x)[4:]
917 917 l = struct.pack(versionformat, self.version)
918 918 fp.write(l)
919 919 fp.write(e)
920 920
921 921 for i in xrange(1, self.count()):
922 922 x = self.index[i]
923 923 e = struct.pack(self.indexformat, *x)
924 924 fp.write(e)
925 925
926 926 # if we don't call rename, the temp file will never replace the
927 927 # real index
928 928 fp.rename()
929 929
930 930 tr.replace(self.indexfile, trindex * calc)
931 931 self.chunkcache = None
932 932
933 933 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
934 934 """add a revision to the log
935 935
936 936 text - the revision data to add
937 937 transaction - the transaction object used for rollback
938 938 link - the linkrev data to add
939 939 p1, p2 - the parent nodeids of the revision
940 940 d - an optional precomputed delta
941 941 """
942 942 if text is None: text = ""
943 943 if p1 is None: p1 = self.tip()
944 944 if p2 is None: p2 = nullid
945 945
946 946 node = hash(text, p1, p2)
947 947
948 948 if node in self.nodemap:
949 949 return node
950 950
951 951 n = self.count()
952 952 t = n - 1
953 953
954 954 if n:
955 955 base = self.base(t)
956 956 start = self.start(base)
957 957 end = self.end(t)
958 958 if not d:
959 959 prev = self.revision(self.tip())
960 960 d = self.diff(prev, str(text))
961 961 data = compress(d)
962 962 l = len(data[1]) + len(data[0])
963 963 dist = end - start + l
964 964
965 965 # full versions are inserted when the needed deltas
966 966 # become comparable to the uncompressed text
967 967 if not n or dist > len(text) * 2:
968 968 data = compress(text)
969 969 l = len(data[1]) + len(data[0])
970 970 base = n
971 971 else:
972 972 base = self.base(t)
973 973
974 974 offset = 0
975 975 if t >= 0:
976 976 offset = self.end(t)
977 977
978 978 if self.version == REVLOGV0:
979 979 e = (offset, l, base, link, p1, p2, node)
980 980 else:
981 981 e = (self.offset_type(offset, 0), l, len(text),
982 982 base, link, self.rev(p1), self.rev(p2), node)
983 983
984 984 self.index.append(e)
985 985 self.nodemap[node] = n
986 986 entry = struct.pack(self.indexformat, *e)
987 987
988 988 if not self.inlinedata():
989 989 transaction.add(self.datafile, offset)
990 990 transaction.add(self.indexfile, n * len(entry))
991 991 f = self.opener(self.datafile, "a")
992 992 if data[0]:
993 993 f.write(data[0])
994 994 f.write(data[1])
995 995 f.close()
996 996 f = self.opener(self.indexfile, "a")
997 997 else:
998 998 f = self.opener(self.indexfile, "a+")
999 999 f.seek(0, 2)
1000 1000 transaction.add(self.indexfile, f.tell(), self.count() - 1)
1001 1001
1002 1002 if len(self.index) == 1 and self.version != REVLOGV0:
1003 1003 l = struct.pack(versionformat, self.version)
1004 1004 f.write(l)
1005 1005 entry = entry[4:]
1006 1006
1007 1007 f.write(entry)
1008 1008
1009 1009 if self.inlinedata():
1010 1010 f.write(data[0])
1011 1011 f.write(data[1])
1012 1012 self.checkinlinesize(transaction, f)
1013 1013
1014 1014 self.cache = (node, n, text)
1015 1015 return node
1016 1016
1017 1017 def ancestor(self, a, b):
1018 1018 """calculate the least common ancestor of nodes a and b"""
1019 1019
1020 1020 # start with some short cuts for the linear cases
1021 1021 if a == b:
1022 1022 return a
1023 1023 ra = self.rev(a)
1024 1024 rb = self.rev(b)
1025 1025 if ra < rb:
1026 1026 last = b
1027 1027 first = a
1028 1028 else:
1029 1029 last = a
1030 1030 first = b
1031 1031
1032 1032 # reachable won't include stop in the list, so we have to use a parent
1033 1033 reachable = self.reachable(last, stop=self.parents(first)[0])
1034 1034 if first in reachable:
1035 1035 return first
1036 1036
1037 1037 # calculate the distance of every node from root
1038 1038 dist = {nullid: 0}
1039 1039 for i in xrange(self.count()):
1040 1040 n = self.node(i)
1041 1041 p1, p2 = self.parents(n)
1042 1042 dist[n] = max(dist[p1], dist[p2]) + 1
1043 1043
1044 1044 # traverse ancestors in order of decreasing distance from root
1045 1045 def ancestors(node):
1046 1046 # we store negative distances because heap returns smallest member
1047 1047 h = [(-dist[node], node)]
1048 1048 seen = {}
1049 1049 while h:
1050 1050 d, n = heapq.heappop(h)
1051 1051 if n not in seen:
1052 1052 seen[n] = 1
1053 1053 yield (-d, n)
1054 1054 for p in self.parents(n):
1055 1055 heapq.heappush(h, (-dist[p], p))
1056 1056
1057 1057 def generations(node):
1058 1058 sg, s = None, {}
1059 1059 for g,n in ancestors(node):
1060 1060 if g != sg:
1061 1061 if sg:
1062 1062 yield sg, s
1063 1063 sg, s = g, {n:1}
1064 1064 else:
1065 1065 s[n] = 1
1066 1066 yield sg, s
1067 1067
1068 1068 x = generations(a)
1069 1069 y = generations(b)
1070 1070 gx = x.next()
1071 1071 gy = y.next()
1072 1072
1073 1073 # increment each ancestor list until it is closer to root than
1074 1074 # the other, or they match
1075 1075 while 1:
1076 1076 #print "ancestor gen %s %s" % (gx[0], gy[0])
1077 1077 if gx[0] == gy[0]:
1078 1078 # find the intersection
1079 1079 i = [ n for n in gx[1] if n in gy[1] ]
1080 1080 if i:
1081 1081 return i[0]
1082 1082 else:
1083 1083 #print "next"
1084 1084 gy = y.next()
1085 1085 gx = x.next()
1086 1086 elif gx[0] < gy[0]:
1087 1087 #print "next y"
1088 1088 gy = y.next()
1089 1089 else:
1090 1090 #print "next x"
1091 1091 gx = x.next()
1092 1092
1093 1093 def group(self, nodelist, lookup, infocollect=None):
1094 1094 """calculate a delta group
1095 1095
1096 1096 Given a list of changeset revs, return a set of deltas and
1097 1097 metadata corresponding to nodes. the first delta is
1098 1098 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1099 1099 have this parent as it has all history before these
1100 1100 changesets. parent is parent[0]
1101 1101 """
1102 1102 revs = [self.rev(n) for n in nodelist]
1103 1103
1104 1104 # if we don't have any revisions touched by these changesets, bail
1105 1105 if not revs:
1106 1106 yield changegroup.closechunk()
1107 1107 return
1108 1108
1109 1109 # add the parent of the first rev
1110 1110 p = self.parents(self.node(revs[0]))[0]
1111 1111 revs.insert(0, self.rev(p))
1112 1112
1113 1113 # build deltas
1114 1114 for d in xrange(0, len(revs) - 1):
1115 1115 a, b = revs[d], revs[d + 1]
1116 1116 nb = self.node(b)
1117 1117
1118 1118 if infocollect is not None:
1119 1119 infocollect(nb)
1120 1120
1121 1121 d = self.revdiff(a, b)
1122 1122 p = self.parents(nb)
1123 1123 meta = nb + p[0] + p[1] + lookup(nb)
1124 1124 yield changegroup.genchunk("%s%s" % (meta, d))
1125 1125
1126 1126 yield changegroup.closechunk()
1127 1127
1128 1128 def addgroup(self, revs, linkmapper, transaction, unique=0):
1129 1129 """
1130 1130 add a delta group
1131 1131
1132 1132 given a set of deltas, add them to the revision log. the
1133 1133 first delta is against its parent, which should be in our
1134 1134 log, the rest are against the previous delta.
1135 1135 """
1136 1136
1137 1137 #track the base of the current delta log
1138 1138 r = self.count()
1139 1139 t = r - 1
1140 1140 node = None
1141 1141
1142 1142 base = prev = -1
1143 1143 start = end = textlen = 0
1144 1144 if r:
1145 1145 end = self.end(t)
1146 1146
1147 1147 ifh = self.opener(self.indexfile, "a+")
1148 1148 ifh.seek(0, 2)
1149 1149 transaction.add(self.indexfile, ifh.tell(), self.count())
1150 1150 if self.inlinedata():
1151 1151 dfh = None
1152 1152 else:
1153 1153 transaction.add(self.datafile, end)
1154 1154 dfh = self.opener(self.datafile, "a")
1155 1155
1156 1156 # loop through our set of deltas
1157 1157 chain = None
1158 1158 for chunk in revs:
1159 1159 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1160 1160 link = linkmapper(cs)
1161 1161 if node in self.nodemap:
1162 1162 # this can happen if two branches make the same change
1163 1163 # if unique:
1164 1164 # raise RevlogError(_("already have %s") % hex(node[:4]))
1165 1165 chain = node
1166 1166 continue
1167 1167 delta = chunk[80:]
1168 1168
1169 1169 for p in (p1, p2):
1170 1170 if not p in self.nodemap:
1171 1171 raise RevlogError(_("unknown parent %s") % short(p))
1172 1172
1173 1173 if not chain:
1174 1174 # retrieve the parent revision of the delta chain
1175 1175 chain = p1
1176 1176 if not chain in self.nodemap:
1177 1177 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1178 1178
1179 1179 # full versions are inserted when the needed deltas become
1180 1180 # comparable to the uncompressed text or when the previous
1181 1181 # version is not the one we have a delta against. We use
1182 1182 # the size of the previous full rev as a proxy for the
1183 1183 # current size.
1184 1184
1185 1185 if chain == prev:
1186 1186 tempd = compress(delta)
1187 1187 cdelta = tempd[0] + tempd[1]
1188 1188 textlen = mdiff.patchedsize(textlen, delta)
1189 1189
1190 1190 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1191 1191 # flush our writes here so we can read it in revision
1192 1192 if dfh:
1193 1193 dfh.flush()
1194 1194 ifh.flush()
1195 1195 text = self.revision(chain)
1196 1196 text = self.patches(text, [delta])
1197 1197 chk = self.addrevision(text, transaction, link, p1, p2)
1198 1198 if chk != node:
1199 1199 raise RevlogError(_("consistency error adding group"))
1200 1200 textlen = len(text)
1201 1201 else:
1202 1202 if self.version == REVLOGV0:
1203 1203 e = (end, len(cdelta), base, link, p1, p2, node)
1204 1204 else:
1205 1205 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1206 1206 link, self.rev(p1), self.rev(p2), node)
1207 1207 self.index.append(e)
1208 1208 self.nodemap[node] = r
1209 1209 if self.inlinedata():
1210 1210 ifh.write(struct.pack(self.indexformat, *e))
1211 1211 ifh.write(cdelta)
1212 1212 self.checkinlinesize(transaction, ifh)
1213 1213 if not self.inlinedata():
1214 1214 dfh = self.opener(self.datafile, "a")
1215 1215 ifh = self.opener(self.indexfile, "a")
1216 1216 else:
1217 1217 if not dfh:
1218 1218 # addrevision switched from inline to conventional
1219 1219 # reopen the index
1220 1220 dfh = self.opener(self.datafile, "a")
1221 1221 ifh = self.opener(self.indexfile, "a")
1222 1222 dfh.write(cdelta)
1223 1223 ifh.write(struct.pack(self.indexformat, *e))
1224 1224
1225 1225 t, r, chain, prev = r, r + 1, node, node
1226 1226 base = self.base(t)
1227 1227 start = self.start(base)
1228 1228 end = self.end(t)
1229 1229
1230 1230 return node
1231 1231
1232 1232 def strip(self, rev, minlink):
1233 1233 if self.count() == 0 or rev >= self.count():
1234 1234 return
1235 1235
1236 1236 if isinstance(self.index, lazyindex):
1237 1237 self.loadindexmap()
1238 1238
1239 1239 # When stripping away a revision, we need to make sure it
1240 1240 # does not actually belong to an older changeset.
1241 1241 # The minlink parameter defines the oldest revision
1242 1242 # we're allowed to strip away.
1243 1243 while minlink > self.index[rev][-4]:
1244 1244 rev += 1
1245 1245 if rev >= self.count():
1246 1246 return
1247 1247
1248 1248 # first truncate the files on disk
1249 1249 end = self.start(rev)
1250 1250 if not self.inlinedata():
1251 1251 df = self.opener(self.datafile, "a")
1252 1252 df.truncate(end)
1253 1253 end = rev * struct.calcsize(self.indexformat)
1254 1254 else:
1255 1255 end += rev * struct.calcsize(self.indexformat)
1256 1256
1257 1257 indexf = self.opener(self.indexfile, "a")
1258 1258 indexf.truncate(end)
1259 1259
1260 1260 # then reset internal state in memory to forget those revisions
1261 1261 self.cache = None
1262 1262 self.chunkcache = None
1263 1263 for x in xrange(rev, self.count()):
1264 1264 del self.nodemap[self.node(x)]
1265 1265
1266 1266 del self.index[rev:]
1267 1267
1268 1268 def checksize(self):
1269 1269 expected = 0
1270 1270 if self.count():
1271 1271 expected = self.end(self.count() - 1)
1272 1272
1273 1273 try:
1274 1274 f = self.opener(self.datafile)
1275 1275 f.seek(0, 2)
1276 1276 actual = f.tell()
1277 1277 dd = actual - expected
1278 1278 except IOError, inst:
1279 1279 if inst.errno != errno.ENOENT:
1280 1280 raise
1281 1281 dd = 0
1282 1282
1283 1283 try:
1284 1284 f = self.opener(self.indexfile)
1285 1285 f.seek(0, 2)
1286 1286 actual = f.tell()
1287 1287 s = struct.calcsize(self.indexformat)
1288 1288 i = actual / s
1289 1289 di = actual - (i * s)
1290 1290 if self.inlinedata():
1291 1291 databytes = 0
1292 1292 for r in xrange(self.count()):
1293 1293 databytes += self.length(r)
1294 1294 dd = 0
1295 1295 di = actual - self.count() * s - databytes
1296 1296 except IOError, inst:
1297 1297 if inst.errno != errno.ENOENT:
1298 1298 raise
1299 1299 di = 0
1300 1300
1301 1301 return (dd, di)
1302 1302
1303 1303
General Comments 0
You need to be logged in to leave comments. Login now