##// END OF EJS Templates
don't let lazyparser read more data than it can handle...
Alexis S. L. Carvalho -
r3078:baa3873e default
parent child Browse files
Show More
@@ -1,1298 +1,1303 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct util zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
26 26
27 27 REVLOG_DEFAULT_FORMAT = REVLOGNG
28 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
29 29
30 30 def flagstr(flag):
31 31 if flag == "inline":
32 32 return REVLOGNGINLINEDATA
33 33 raise RevlogError(_("unknown revlog flag %s" % flag))
34 34
35 35 def hash(text, p1, p2):
36 36 """generate a hash from the given text and its parent hashes
37 37
38 38 This hash combines both the current file contents and its history
39 39 in a manner that makes it easy to distinguish nodes with the same
40 40 content in the revision graph.
41 41 """
42 42 l = [p1, p2]
43 43 l.sort()
44 44 s = sha.new(l[0])
45 45 s.update(l[1])
46 46 s.update(text)
47 47 return s.digest()
48 48
49 49 def compress(text):
50 50 """ generate a possibly-compressed representation of text """
51 51 if not text: return ("", text)
52 52 if len(text) < 44:
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 bin = zlib.compress(text)
56 56 if len(bin) > len(text):
57 57 if text[0] == '\0': return ("", text)
58 58 return ('u', text)
59 59 return ("", bin)
60 60
61 61 def decompress(bin):
62 62 """ decompress the given input """
63 63 if not bin: return bin
64 64 t = bin[0]
65 65 if t == '\0': return bin
66 66 if t == 'x': return zlib.decompress(bin)
67 67 if t == 'u': return bin[1:]
68 68 raise RevlogError(_("unknown compression type %r") % t)
69 69
70 70 indexformatv0 = ">4l20s20s20s"
71 71 v0shaoffset = 56
72 72 # index ng:
73 73 # 6 bytes offset
74 74 # 2 bytes flags
75 75 # 4 bytes compressed length
76 76 # 4 bytes uncompressed length
77 77 # 4 bytes: base rev
78 78 # 4 bytes link rev
79 79 # 4 bytes parent 1 rev
80 80 # 4 bytes parent 2 rev
81 81 # 32 bytes: nodeid
82 82 indexformatng = ">Qiiiiii20s12x"
83 83 ngshaoffset = 32
84 84 versionformat = ">i"
85 85
86 86 class lazyparser(object):
87 87 """
88 88 this class avoids the need to parse the entirety of large indices
89 89 """
90 90
91 91 # lazyparser is not safe to use on windows if win32 extensions not
92 92 # available. it keeps file handle open, which make it not possible
93 93 # to break hardlinks on local cloned repos.
94 94 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 95 hasattr(util, 'win32api'))
96 96
97 97 def __init__(self, dataf, size, indexformat, shaoffset):
98 98 self.dataf = dataf
99 99 self.format = indexformat
100 100 self.s = struct.calcsize(indexformat)
101 101 self.indexformat = indexformat
102 102 self.datasize = size
103 103 self.l = size/self.s
104 104 self.index = [None] * self.l
105 105 self.map = {nullid: -1}
106 106 self.allmap = 0
107 107 self.all = 0
108 108 self.mapfind_count = 0
109 109 self.shaoffset = shaoffset
110 110
111 111 def loadmap(self):
112 112 """
113 113 during a commit, we need to make sure the rev being added is
114 114 not a duplicate. This requires loading the entire index,
115 115 which is fairly slow. loadmap can load up just the node map,
116 116 which takes much less time.
117 117 """
118 118 if self.allmap: return
119 119 start = 0
120 120 end = self.datasize
121 121 self.allmap = 1
122 122 cur = 0
123 123 count = 0
124 124 blocksize = self.s * 256
125 125 self.dataf.seek(0)
126 126 while cur < end:
127 127 data = self.dataf.read(blocksize)
128 128 off = 0
129 129 for x in xrange(256):
130 130 n = data[off + self.shaoffset:off + self.shaoffset + 20]
131 131 self.map[n] = count
132 132 count += 1
133 133 if count >= self.l:
134 134 break
135 135 off += self.s
136 136 cur += blocksize
137 137
138 138 def loadblock(self, blockstart, blocksize, data=None):
139 139 if self.all: return
140 140 if data is None:
141 141 self.dataf.seek(blockstart)
142 if blockstart + blocksize > self.datasize:
143 # the revlog may have grown since we've started running,
144 # but we don't have space in self.index for more entries.
145 # limit blocksize so that we don't get too much data.
146 blocksize = self.datasize - blockstart
142 147 data = self.dataf.read(blocksize)
143 148 lend = len(data) / self.s
144 149 i = blockstart / self.s
145 150 off = 0
146 151 for x in xrange(lend):
147 152 if self.index[i + x] == None:
148 153 b = data[off : off + self.s]
149 154 self.index[i + x] = b
150 155 n = b[self.shaoffset:self.shaoffset + 20]
151 156 self.map[n] = i + x
152 157 off += self.s
153 158
154 159 def findnode(self, node):
155 160 """search backwards through the index file for a specific node"""
156 161 if self.allmap: return None
157 162
158 163 # hg log will cause many many searches for the manifest
159 164 # nodes. After we get called a few times, just load the whole
160 165 # thing.
161 166 if self.mapfind_count > 8:
162 167 self.loadmap()
163 168 if node in self.map:
164 169 return node
165 170 return None
166 171 self.mapfind_count += 1
167 172 last = self.l - 1
168 173 while self.index[last] != None:
169 174 if last == 0:
170 175 self.all = 1
171 176 self.allmap = 1
172 177 return None
173 178 last -= 1
174 179 end = (last + 1) * self.s
175 180 blocksize = self.s * 256
176 181 while end >= 0:
177 182 start = max(end - blocksize, 0)
178 183 self.dataf.seek(start)
179 184 data = self.dataf.read(end - start)
180 185 findend = end - start
181 186 while True:
182 187 # we're searching backwards, so weh have to make sure
183 188 # we don't find a changeset where this node is a parent
184 189 off = data.rfind(node, 0, findend)
185 190 findend = off
186 191 if off >= 0:
187 192 i = off / self.s
188 193 off = i * self.s
189 194 n = data[off + self.shaoffset:off + self.shaoffset + 20]
190 195 if n == node:
191 196 self.map[n] = i + start / self.s
192 197 return node
193 198 else:
194 199 break
195 200 end -= blocksize
196 201 return None
197 202
198 203 def loadindex(self, i=None, end=None):
199 204 if self.all: return
200 205 all = False
201 206 if i == None:
202 207 blockstart = 0
203 208 blocksize = (512 / self.s) * self.s
204 209 end = self.datasize
205 210 all = True
206 211 else:
207 212 if end:
208 213 blockstart = i * self.s
209 214 end = end * self.s
210 215 blocksize = end - blockstart
211 216 else:
212 217 blockstart = (i & ~(32)) * self.s
213 218 blocksize = self.s * 64
214 219 end = blockstart + blocksize
215 220 while blockstart < end:
216 221 self.loadblock(blockstart, blocksize)
217 222 blockstart += blocksize
218 223 if all: self.all = True
219 224
220 225 class lazyindex(object):
221 226 """a lazy version of the index array"""
222 227 def __init__(self, parser):
223 228 self.p = parser
224 229 def __len__(self):
225 230 return len(self.p.index)
226 231 def load(self, pos):
227 232 if pos < 0:
228 233 pos += len(self.p.index)
229 234 self.p.loadindex(pos)
230 235 return self.p.index[pos]
231 236 def __getitem__(self, pos):
232 237 ret = self.p.index[pos] or self.load(pos)
233 238 if isinstance(ret, str):
234 239 ret = struct.unpack(self.p.indexformat, ret)
235 240 return ret
236 241 def __setitem__(self, pos, item):
237 242 self.p.index[pos] = item
238 243 def __delitem__(self, pos):
239 244 del self.p.index[pos]
240 245 def append(self, e):
241 246 self.p.index.append(e)
242 247
243 248 class lazymap(object):
244 249 """a lazy version of the node map"""
245 250 def __init__(self, parser):
246 251 self.p = parser
247 252 def load(self, key):
248 253 n = self.p.findnode(key)
249 254 if n == None:
250 255 raise KeyError(key)
251 256 def __contains__(self, key):
252 257 if key in self.p.map:
253 258 return True
254 259 self.p.loadmap()
255 260 return key in self.p.map
256 261 def __iter__(self):
257 262 yield nullid
258 263 for i in xrange(self.p.l):
259 264 ret = self.p.index[i]
260 265 if not ret:
261 266 self.p.loadindex(i)
262 267 ret = self.p.index[i]
263 268 if isinstance(ret, str):
264 269 ret = struct.unpack(self.p.indexformat, ret)
265 270 yield ret[-1]
266 271 def __getitem__(self, key):
267 272 try:
268 273 return self.p.map[key]
269 274 except KeyError:
270 275 try:
271 276 self.load(key)
272 277 return self.p.map[key]
273 278 except KeyError:
274 279 raise KeyError("node " + hex(key))
275 280 def __setitem__(self, key, val):
276 281 self.p.map[key] = val
277 282 def __delitem__(self, key):
278 283 del self.p.map[key]
279 284
280 285 class RevlogError(Exception): pass
281 286
282 287 class revlog(object):
283 288 """
284 289 the underlying revision storage object
285 290
286 291 A revlog consists of two parts, an index and the revision data.
287 292
288 293 The index is a file with a fixed record size containing
289 294 information on each revision, includings its nodeid (hash), the
290 295 nodeids of its parents, the position and offset of its data within
291 296 the data file, and the revision it's based on. Finally, each entry
292 297 contains a linkrev entry that can serve as a pointer to external
293 298 data.
294 299
295 300 The revision data itself is a linear collection of data chunks.
296 301 Each chunk represents a revision and is usually represented as a
297 302 delta against the previous chunk. To bound lookup time, runs of
298 303 deltas are limited to about 2 times the length of the original
299 304 version data. This makes retrieval of a version proportional to
300 305 its size, or O(1) relative to the number of revisions.
301 306
302 307 Both pieces of the revlog are written to in an append-only
303 308 fashion, which means we never need to rewrite a file to insert or
304 309 remove data, and can use some simple techniques to avoid the need
305 310 for locking while reading.
306 311 """
307 312 def __init__(self, opener, indexfile, datafile,
308 313 defversion=REVLOG_DEFAULT_VERSION):
309 314 """
310 315 create a revlog object
311 316
312 317 opener is a function that abstracts the file opening operation
313 318 and can be used to implement COW semantics or the like.
314 319 """
315 320 self.indexfile = indexfile
316 321 self.datafile = datafile
317 322 self.opener = opener
318 323
319 324 self.indexstat = None
320 325 self.cache = None
321 326 self.chunkcache = None
322 327 self.defversion = defversion
323 328 self.load()
324 329
325 330 def load(self):
326 331 v = self.defversion
327 332 try:
328 333 f = self.opener(self.indexfile)
329 334 i = f.read(4)
330 335 f.seek(0)
331 336 except IOError, inst:
332 337 if inst.errno != errno.ENOENT:
333 338 raise
334 339 i = ""
335 340 else:
336 341 try:
337 342 st = util.fstat(f)
338 343 except AttributeError, inst:
339 344 st = None
340 345 else:
341 346 oldst = self.indexstat
342 347 if (oldst and st.st_dev == oldst.st_dev
343 348 and st.st_ino == oldst.st_ino
344 349 and st.st_mtime == oldst.st_mtime
345 350 and st.st_ctime == oldst.st_ctime):
346 351 return
347 352 self.indexstat = st
348 353 if len(i) > 0:
349 354 v = struct.unpack(versionformat, i)[0]
350 355 flags = v & ~0xFFFF
351 356 fmt = v & 0xFFFF
352 357 if fmt == REVLOGV0:
353 358 if flags:
354 359 raise RevlogError(_("index %s invalid flags %x for format v0" %
355 360 (self.indexfile, flags)))
356 361 elif fmt == REVLOGNG:
357 362 if flags & ~REVLOGNGINLINEDATA:
358 363 raise RevlogError(_("index %s invalid flags %x for revlogng" %
359 364 (self.indexfile, flags)))
360 365 else:
361 366 raise RevlogError(_("index %s invalid format %d" %
362 367 (self.indexfile, fmt)))
363 368 self.version = v
364 369 if v == REVLOGV0:
365 370 self.indexformat = indexformatv0
366 371 shaoffset = v0shaoffset
367 372 else:
368 373 self.indexformat = indexformatng
369 374 shaoffset = ngshaoffset
370 375
371 376 if i:
372 377 if (lazyparser.safe_to_use and not self.inlinedata() and
373 378 st and st.st_size > 10000):
374 379 # big index, let's parse it on demand
375 380 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
376 381 self.index = lazyindex(parser)
377 382 self.nodemap = lazymap(parser)
378 383 else:
379 384 self.parseindex(f, st)
380 385 if self.version != REVLOGV0:
381 386 e = list(self.index[0])
382 387 type = self.ngtype(e[0])
383 388 e[0] = self.offset_type(0, type)
384 389 self.index[0] = e
385 390 else:
386 391 self.nodemap = { nullid: -1}
387 392 self.index = []
388 393
389 394
390 395 def parseindex(self, fp, st):
391 396 s = struct.calcsize(self.indexformat)
392 397 self.index = []
393 398 self.nodemap = {nullid: -1}
394 399 inline = self.inlinedata()
395 400 n = 0
396 401 leftover = None
397 402 while True:
398 403 if st:
399 404 data = fp.read(65536)
400 405 else:
401 406 # hack for httprangereader, it doesn't do partial reads well
402 407 data = fp.read()
403 408 if not data:
404 409 break
405 410 if n == 0 and self.inlinedata():
406 411 # cache the first chunk
407 412 self.chunkcache = (0, data)
408 413 if leftover:
409 414 data = leftover + data
410 415 leftover = None
411 416 off = 0
412 417 l = len(data)
413 418 while off < l:
414 419 if l - off < s:
415 420 leftover = data[off:]
416 421 break
417 422 cur = data[off:off + s]
418 423 off += s
419 424 e = struct.unpack(self.indexformat, cur)
420 425 self.index.append(e)
421 426 self.nodemap[e[-1]] = n
422 427 n += 1
423 428 if inline:
424 429 off += e[1]
425 430 if off > l:
426 431 # some things don't seek well, just read it
427 432 fp.read(off - l)
428 433 if not st:
429 434 break
430 435
431 436
432 437 def ngoffset(self, q):
433 438 if q & 0xFFFF:
434 439 raise RevlogError(_('%s: incompatible revision flag %x') %
435 440 (self.indexfile, q))
436 441 return long(q >> 16)
437 442
438 443 def ngtype(self, q):
439 444 return int(q & 0xFFFF)
440 445
441 446 def offset_type(self, offset, type):
442 447 return long(long(offset) << 16 | type)
443 448
444 449 def loadindex(self, start, end):
445 450 """load a block of indexes all at once from the lazy parser"""
446 451 if isinstance(self.index, lazyindex):
447 452 self.index.p.loadindex(start, end)
448 453
449 454 def loadindexmap(self):
450 455 """loads both the map and the index from the lazy parser"""
451 456 if isinstance(self.index, lazyindex):
452 457 p = self.index.p
453 458 p.loadindex()
454 459 self.nodemap = p.map
455 460
456 461 def loadmap(self):
457 462 """loads the map from the lazy parser"""
458 463 if isinstance(self.nodemap, lazymap):
459 464 self.nodemap.p.loadmap()
460 465 self.nodemap = self.nodemap.p.map
461 466
462 467 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
463 468 def tip(self): return self.node(len(self.index) - 1)
464 469 def count(self): return len(self.index)
465 470 def node(self, rev):
466 471 return (rev < 0) and nullid or self.index[rev][-1]
467 472 def rev(self, node):
468 473 try:
469 474 return self.nodemap[node]
470 475 except KeyError:
471 476 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
472 477 def linkrev(self, node):
473 478 return (node == nullid) and -1 or self.index[self.rev(node)][-4]
474 479 def parents(self, node):
475 480 if node == nullid: return (nullid, nullid)
476 481 r = self.rev(node)
477 482 d = self.index[r][-3:-1]
478 483 if self.version == REVLOGV0:
479 484 return d
480 485 return [ self.node(x) for x in d ]
481 486 def parentrevs(self, rev):
482 487 if rev == -1:
483 488 return (-1, -1)
484 489 d = self.index[rev][-3:-1]
485 490 if self.version == REVLOGV0:
486 491 return [ self.rev(x) for x in d ]
487 492 return d
488 493 def start(self, rev):
489 494 if rev < 0:
490 495 return -1
491 496 if self.version != REVLOGV0:
492 497 return self.ngoffset(self.index[rev][0])
493 498 return self.index[rev][0]
494 499
495 500 def end(self, rev): return self.start(rev) + self.length(rev)
496 501
497 502 def size(self, rev):
498 503 """return the length of the uncompressed text for a given revision"""
499 504 l = -1
500 505 if self.version != REVLOGV0:
501 506 l = self.index[rev][2]
502 507 if l >= 0:
503 508 return l
504 509
505 510 t = self.revision(self.node(rev))
506 511 return len(t)
507 512
508 513 # alternate implementation, The advantage to this code is it
509 514 # will be faster for a single revision. But, the results are not
510 515 # cached, so finding the size of every revision will be slower.
511 516 """
512 517 if self.cache and self.cache[1] == rev:
513 518 return len(self.cache[2])
514 519
515 520 base = self.base(rev)
516 521 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
517 522 base = self.cache[1]
518 523 text = self.cache[2]
519 524 else:
520 525 text = self.revision(self.node(base))
521 526
522 527 l = len(text)
523 528 for x in xrange(base + 1, rev + 1):
524 529 l = mdiff.patchedsize(l, self.chunk(x))
525 530 return l
526 531 """
527 532
528 533 def length(self, rev):
529 534 if rev < 0:
530 535 return 0
531 536 else:
532 537 return self.index[rev][1]
533 538 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
534 539
535 540 def reachable(self, rev, stop=None):
536 541 reachable = {}
537 542 visit = [rev]
538 543 reachable[rev] = 1
539 544 if stop:
540 545 stopn = self.rev(stop)
541 546 else:
542 547 stopn = 0
543 548 while visit:
544 549 n = visit.pop(0)
545 550 if n == stop:
546 551 continue
547 552 if n == nullid:
548 553 continue
549 554 for p in self.parents(n):
550 555 if self.rev(p) < stopn:
551 556 continue
552 557 if p not in reachable:
553 558 reachable[p] = 1
554 559 visit.append(p)
555 560 return reachable
556 561
557 562 def nodesbetween(self, roots=None, heads=None):
558 563 """Return a tuple containing three elements. Elements 1 and 2 contain
559 564 a final list bases and heads after all the unreachable ones have been
560 565 pruned. Element 0 contains a topologically sorted list of all
561 566
562 567 nodes that satisfy these constraints:
563 568 1. All nodes must be descended from a node in roots (the nodes on
564 569 roots are considered descended from themselves).
565 570 2. All nodes must also be ancestors of a node in heads (the nodes in
566 571 heads are considered to be their own ancestors).
567 572
568 573 If roots is unspecified, nullid is assumed as the only root.
569 574 If heads is unspecified, it is taken to be the output of the
570 575 heads method (i.e. a list of all nodes in the repository that
571 576 have no children)."""
572 577 nonodes = ([], [], [])
573 578 if roots is not None:
574 579 roots = list(roots)
575 580 if not roots:
576 581 return nonodes
577 582 lowestrev = min([self.rev(n) for n in roots])
578 583 else:
579 584 roots = [nullid] # Everybody's a descendent of nullid
580 585 lowestrev = -1
581 586 if (lowestrev == -1) and (heads is None):
582 587 # We want _all_ the nodes!
583 588 return ([self.node(r) for r in xrange(0, self.count())],
584 589 [nullid], list(self.heads()))
585 590 if heads is None:
586 591 # All nodes are ancestors, so the latest ancestor is the last
587 592 # node.
588 593 highestrev = self.count() - 1
589 594 # Set ancestors to None to signal that every node is an ancestor.
590 595 ancestors = None
591 596 # Set heads to an empty dictionary for later discovery of heads
592 597 heads = {}
593 598 else:
594 599 heads = list(heads)
595 600 if not heads:
596 601 return nonodes
597 602 ancestors = {}
598 603 # Start at the top and keep marking parents until we're done.
599 604 nodestotag = heads[:]
600 605 # Turn heads into a dictionary so we can remove 'fake' heads.
601 606 # Also, later we will be using it to filter out the heads we can't
602 607 # find from roots.
603 608 heads = dict.fromkeys(heads, 0)
604 609 # Remember where the top was so we can use it as a limit later.
605 610 highestrev = max([self.rev(n) for n in nodestotag])
606 611 while nodestotag:
607 612 # grab a node to tag
608 613 n = nodestotag.pop()
609 614 # Never tag nullid
610 615 if n == nullid:
611 616 continue
612 617 # A node's revision number represents its place in a
613 618 # topologically sorted list of nodes.
614 619 r = self.rev(n)
615 620 if r >= lowestrev:
616 621 if n not in ancestors:
617 622 # If we are possibly a descendent of one of the roots
618 623 # and we haven't already been marked as an ancestor
619 624 ancestors[n] = 1 # Mark as ancestor
620 625 # Add non-nullid parents to list of nodes to tag.
621 626 nodestotag.extend([p for p in self.parents(n) if
622 627 p != nullid])
623 628 elif n in heads: # We've seen it before, is it a fake head?
624 629 # So it is, real heads should not be the ancestors of
625 630 # any other heads.
626 631 heads.pop(n)
627 632 if not ancestors:
628 633 return nonodes
629 634 # Now that we have our set of ancestors, we want to remove any
630 635 # roots that are not ancestors.
631 636
632 637 # If one of the roots was nullid, everything is included anyway.
633 638 if lowestrev > -1:
634 639 # But, since we weren't, let's recompute the lowest rev to not
635 640 # include roots that aren't ancestors.
636 641
637 642 # Filter out roots that aren't ancestors of heads
638 643 roots = [n for n in roots if n in ancestors]
639 644 # Recompute the lowest revision
640 645 if roots:
641 646 lowestrev = min([self.rev(n) for n in roots])
642 647 else:
643 648 # No more roots? Return empty list
644 649 return nonodes
645 650 else:
646 651 # We are descending from nullid, and don't need to care about
647 652 # any other roots.
648 653 lowestrev = -1
649 654 roots = [nullid]
650 655 # Transform our roots list into a 'set' (i.e. a dictionary where the
651 656 # values don't matter.
652 657 descendents = dict.fromkeys(roots, 1)
653 658 # Also, keep the original roots so we can filter out roots that aren't
654 659 # 'real' roots (i.e. are descended from other roots).
655 660 roots = descendents.copy()
656 661 # Our topologically sorted list of output nodes.
657 662 orderedout = []
658 663 # Don't start at nullid since we don't want nullid in our output list,
659 664 # and if nullid shows up in descedents, empty parents will look like
660 665 # they're descendents.
661 666 for r in xrange(max(lowestrev, 0), highestrev + 1):
662 667 n = self.node(r)
663 668 isdescendent = False
664 669 if lowestrev == -1: # Everybody is a descendent of nullid
665 670 isdescendent = True
666 671 elif n in descendents:
667 672 # n is already a descendent
668 673 isdescendent = True
669 674 # This check only needs to be done here because all the roots
670 675 # will start being marked is descendents before the loop.
671 676 if n in roots:
672 677 # If n was a root, check if it's a 'real' root.
673 678 p = tuple(self.parents(n))
674 679 # If any of its parents are descendents, it's not a root.
675 680 if (p[0] in descendents) or (p[1] in descendents):
676 681 roots.pop(n)
677 682 else:
678 683 p = tuple(self.parents(n))
679 684 # A node is a descendent if either of its parents are
680 685 # descendents. (We seeded the dependents list with the roots
681 686 # up there, remember?)
682 687 if (p[0] in descendents) or (p[1] in descendents):
683 688 descendents[n] = 1
684 689 isdescendent = True
685 690 if isdescendent and ((ancestors is None) or (n in ancestors)):
686 691 # Only include nodes that are both descendents and ancestors.
687 692 orderedout.append(n)
688 693 if (ancestors is not None) and (n in heads):
689 694 # We're trying to figure out which heads are reachable
690 695 # from roots.
691 696 # Mark this head as having been reached
692 697 heads[n] = 1
693 698 elif ancestors is None:
694 699 # Otherwise, we're trying to discover the heads.
695 700 # Assume this is a head because if it isn't, the next step
696 701 # will eventually remove it.
697 702 heads[n] = 1
698 703 # But, obviously its parents aren't.
699 704 for p in self.parents(n):
700 705 heads.pop(p, None)
701 706 heads = [n for n in heads.iterkeys() if heads[n] != 0]
702 707 roots = roots.keys()
703 708 assert orderedout
704 709 assert roots
705 710 assert heads
706 711 return (orderedout, roots, heads)
707 712
708 713 def heads(self, start=None):
709 714 """return the list of all nodes that have no children
710 715
711 716 if start is specified, only heads that are descendants of
712 717 start will be returned
713 718
714 719 """
715 720 if start is None:
716 721 start = nullid
717 722 startrev = self.rev(start)
718 723 reachable = {startrev: 1}
719 724 heads = {startrev: 1}
720 725
721 726 parentrevs = self.parentrevs
722 727 for r in xrange(startrev + 1, self.count()):
723 728 for p in parentrevs(r):
724 729 if p in reachable:
725 730 reachable[r] = 1
726 731 heads[r] = 1
727 732 if p in heads:
728 733 del heads[p]
729 734 return [self.node(r) for r in heads]
730 735
731 736 def children(self, node):
732 737 """find the children of a given node"""
733 738 c = []
734 739 p = self.rev(node)
735 740 for r in range(p + 1, self.count()):
736 741 n = self.node(r)
737 742 for pn in self.parents(n):
738 743 if pn == node:
739 744 c.append(n)
740 745 continue
741 746 elif pn == nullid:
742 747 continue
743 748 return c
744 749
745 750 def lookup(self, id):
746 751 """locate a node based on revision number or subset of hex nodeid"""
747 752 if type(id) == type(0):
748 753 return self.node(id)
749 754 try:
750 755 rev = int(id)
751 756 if str(rev) != id: raise ValueError
752 757 if rev < 0: rev = self.count() + rev
753 758 if rev < 0 or rev >= self.count(): raise ValueError
754 759 return self.node(rev)
755 760 except (ValueError, OverflowError):
756 761 c = []
757 762 for n in self.nodemap:
758 763 if hex(n).startswith(id):
759 764 c.append(n)
760 765 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
761 766 if len(c) == 1: return c[0]
762 767
763 768 # might need fixing if we change hash lengths
764 769 if len(id) == 20 and id in self.nodemap:
765 770 return id
766 771
767 772 raise RevlogError(_("No match found"))
768 773
769 774 def cmp(self, node, text):
770 775 """compare text with a given file revision"""
771 776 p1, p2 = self.parents(node)
772 777 return hash(text, p1, p2) != node
773 778
774 779 def makenode(self, node, text):
775 780 """calculate a file nodeid for text, descended or possibly
776 781 unchanged from node"""
777 782
778 783 if self.cmp(node, text):
779 784 return hash(text, node, nullid)
780 785 return node
781 786
782 787 def diff(self, a, b):
783 788 """return a delta between two revisions"""
784 789 return mdiff.textdiff(a, b)
785 790
786 791 def patches(self, t, pl):
787 792 """apply a list of patches to a string"""
788 793 return mdiff.patches(t, pl)
789 794
790 795 def chunk(self, rev, df=None, cachelen=4096):
791 796 start, length = self.start(rev), self.length(rev)
792 797 inline = self.inlinedata()
793 798 if inline:
794 799 start += (rev + 1) * struct.calcsize(self.indexformat)
795 800 end = start + length
796 801 def loadcache(df):
797 802 cache_length = max(cachelen, length) # 4k
798 803 if not df:
799 804 if inline:
800 805 df = self.opener(self.indexfile)
801 806 else:
802 807 df = self.opener(self.datafile)
803 808 df.seek(start)
804 809 self.chunkcache = (start, df.read(cache_length))
805 810
806 811 if not self.chunkcache:
807 812 loadcache(df)
808 813
809 814 cache_start = self.chunkcache[0]
810 815 cache_end = cache_start + len(self.chunkcache[1])
811 816 if start >= cache_start and end <= cache_end:
812 817 # it is cached
813 818 offset = start - cache_start
814 819 else:
815 820 loadcache(df)
816 821 offset = 0
817 822
818 823 #def checkchunk():
819 824 # df = self.opener(self.datafile)
820 825 # df.seek(start)
821 826 # return df.read(length)
822 827 #assert s == checkchunk()
823 828 return decompress(self.chunkcache[1][offset:offset + length])
824 829
825 830 def delta(self, node):
826 831 """return or calculate a delta between a node and its predecessor"""
827 832 r = self.rev(node)
828 833 return self.revdiff(r - 1, r)
829 834
830 835 def revdiff(self, rev1, rev2):
831 836 """return or calculate a delta between two revisions"""
832 837 b1 = self.base(rev1)
833 838 b2 = self.base(rev2)
834 839 if b1 == b2 and rev1 + 1 == rev2:
835 840 return self.chunk(rev2)
836 841 else:
837 842 return self.diff(self.revision(self.node(rev1)),
838 843 self.revision(self.node(rev2)))
839 844
840 845 def revision(self, node):
841 846 """return an uncompressed revision of a given"""
842 847 if node == nullid: return ""
843 848 if self.cache and self.cache[0] == node: return self.cache[2]
844 849
845 850 # look up what we need to read
846 851 text = None
847 852 rev = self.rev(node)
848 853 base = self.base(rev)
849 854
850 855 if self.inlinedata():
851 856 # we probably have the whole chunk cached
852 857 df = None
853 858 else:
854 859 df = self.opener(self.datafile)
855 860
856 861 # do we have useful data cached?
857 862 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
858 863 base = self.cache[1]
859 864 text = self.cache[2]
860 865 self.loadindex(base, rev + 1)
861 866 else:
862 867 self.loadindex(base, rev + 1)
863 868 text = self.chunk(base, df=df)
864 869
865 870 bins = []
866 871 for r in xrange(base + 1, rev + 1):
867 872 bins.append(self.chunk(r, df=df))
868 873
869 874 text = self.patches(text, bins)
870 875
871 876 p1, p2 = self.parents(node)
872 877 if node != hash(text, p1, p2):
873 878 raise RevlogError(_("integrity check failed on %s:%d")
874 879 % (self.datafile, rev))
875 880
876 881 self.cache = (node, rev, text)
877 882 return text
878 883
879 884 def checkinlinesize(self, tr, fp=None):
880 885 if not self.inlinedata():
881 886 return
882 887 if not fp:
883 888 fp = self.opener(self.indexfile, 'r')
884 889 fp.seek(0, 2)
885 890 size = fp.tell()
886 891 if size < 131072:
887 892 return
888 893 trinfo = tr.find(self.indexfile)
889 894 if trinfo == None:
890 895 raise RevlogError(_("%s not found in the transaction" %
891 896 self.indexfile))
892 897
893 898 trindex = trinfo[2]
894 899 dataoff = self.start(trindex)
895 900
896 901 tr.add(self.datafile, dataoff)
897 902 df = self.opener(self.datafile, 'w')
898 903 calc = struct.calcsize(self.indexformat)
899 904 for r in xrange(self.count()):
900 905 start = self.start(r) + (r + 1) * calc
901 906 length = self.length(r)
902 907 fp.seek(start)
903 908 d = fp.read(length)
904 909 df.write(d)
905 910 fp.close()
906 911 df.close()
907 912 fp = self.opener(self.indexfile, 'w', atomictemp=True)
908 913 self.version &= ~(REVLOGNGINLINEDATA)
909 914 if self.count():
910 915 x = self.index[0]
911 916 e = struct.pack(self.indexformat, *x)[4:]
912 917 l = struct.pack(versionformat, self.version)
913 918 fp.write(l)
914 919 fp.write(e)
915 920
916 921 for i in xrange(1, self.count()):
917 922 x = self.index[i]
918 923 e = struct.pack(self.indexformat, *x)
919 924 fp.write(e)
920 925
921 926 # if we don't call rename, the temp file will never replace the
922 927 # real index
923 928 fp.rename()
924 929
925 930 tr.replace(self.indexfile, trindex * calc)
926 931 self.chunkcache = None
927 932
928 933 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
929 934 """add a revision to the log
930 935
931 936 text - the revision data to add
932 937 transaction - the transaction object used for rollback
933 938 link - the linkrev data to add
934 939 p1, p2 - the parent nodeids of the revision
935 940 d - an optional precomputed delta
936 941 """
937 942 if text is None: text = ""
938 943 if p1 is None: p1 = self.tip()
939 944 if p2 is None: p2 = nullid
940 945
941 946 node = hash(text, p1, p2)
942 947
943 948 if node in self.nodemap:
944 949 return node
945 950
946 951 n = self.count()
947 952 t = n - 1
948 953
949 954 if n:
950 955 base = self.base(t)
951 956 start = self.start(base)
952 957 end = self.end(t)
953 958 if not d:
954 959 prev = self.revision(self.tip())
955 960 d = self.diff(prev, str(text))
956 961 data = compress(d)
957 962 l = len(data[1]) + len(data[0])
958 963 dist = end - start + l
959 964
960 965 # full versions are inserted when the needed deltas
961 966 # become comparable to the uncompressed text
962 967 if not n or dist > len(text) * 2:
963 968 data = compress(text)
964 969 l = len(data[1]) + len(data[0])
965 970 base = n
966 971 else:
967 972 base = self.base(t)
968 973
969 974 offset = 0
970 975 if t >= 0:
971 976 offset = self.end(t)
972 977
973 978 if self.version == REVLOGV0:
974 979 e = (offset, l, base, link, p1, p2, node)
975 980 else:
976 981 e = (self.offset_type(offset, 0), l, len(text),
977 982 base, link, self.rev(p1), self.rev(p2), node)
978 983
979 984 self.index.append(e)
980 985 self.nodemap[node] = n
981 986 entry = struct.pack(self.indexformat, *e)
982 987
983 988 if not self.inlinedata():
984 989 transaction.add(self.datafile, offset)
985 990 transaction.add(self.indexfile, n * len(entry))
986 991 f = self.opener(self.datafile, "a")
987 992 if data[0]:
988 993 f.write(data[0])
989 994 f.write(data[1])
990 995 f.close()
991 996 f = self.opener(self.indexfile, "a")
992 997 else:
993 998 f = self.opener(self.indexfile, "a+")
994 999 f.seek(0, 2)
995 1000 transaction.add(self.indexfile, f.tell(), self.count() - 1)
996 1001
997 1002 if len(self.index) == 1 and self.version != REVLOGV0:
998 1003 l = struct.pack(versionformat, self.version)
999 1004 f.write(l)
1000 1005 entry = entry[4:]
1001 1006
1002 1007 f.write(entry)
1003 1008
1004 1009 if self.inlinedata():
1005 1010 f.write(data[0])
1006 1011 f.write(data[1])
1007 1012 self.checkinlinesize(transaction, f)
1008 1013
1009 1014 self.cache = (node, n, text)
1010 1015 return node
1011 1016
1012 1017 def ancestor(self, a, b):
1013 1018 """calculate the least common ancestor of nodes a and b"""
1014 1019
1015 1020 # start with some short cuts for the linear cases
1016 1021 if a == b:
1017 1022 return a
1018 1023 ra = self.rev(a)
1019 1024 rb = self.rev(b)
1020 1025 if ra < rb:
1021 1026 last = b
1022 1027 first = a
1023 1028 else:
1024 1029 last = a
1025 1030 first = b
1026 1031
1027 1032 # reachable won't include stop in the list, so we have to use a parent
1028 1033 reachable = self.reachable(last, stop=self.parents(first)[0])
1029 1034 if first in reachable:
1030 1035 return first
1031 1036
1032 1037 # calculate the distance of every node from root
1033 1038 dist = {nullid: 0}
1034 1039 for i in xrange(self.count()):
1035 1040 n = self.node(i)
1036 1041 p1, p2 = self.parents(n)
1037 1042 dist[n] = max(dist[p1], dist[p2]) + 1
1038 1043
1039 1044 # traverse ancestors in order of decreasing distance from root
1040 1045 def ancestors(node):
1041 1046 # we store negative distances because heap returns smallest member
1042 1047 h = [(-dist[node], node)]
1043 1048 seen = {}
1044 1049 while h:
1045 1050 d, n = heapq.heappop(h)
1046 1051 if n not in seen:
1047 1052 seen[n] = 1
1048 1053 yield (-d, n)
1049 1054 for p in self.parents(n):
1050 1055 heapq.heappush(h, (-dist[p], p))
1051 1056
1052 1057 def generations(node):
1053 1058 sg, s = None, {}
1054 1059 for g,n in ancestors(node):
1055 1060 if g != sg:
1056 1061 if sg:
1057 1062 yield sg, s
1058 1063 sg, s = g, {n:1}
1059 1064 else:
1060 1065 s[n] = 1
1061 1066 yield sg, s
1062 1067
1063 1068 x = generations(a)
1064 1069 y = generations(b)
1065 1070 gx = x.next()
1066 1071 gy = y.next()
1067 1072
1068 1073 # increment each ancestor list until it is closer to root than
1069 1074 # the other, or they match
1070 1075 while 1:
1071 1076 #print "ancestor gen %s %s" % (gx[0], gy[0])
1072 1077 if gx[0] == gy[0]:
1073 1078 # find the intersection
1074 1079 i = [ n for n in gx[1] if n in gy[1] ]
1075 1080 if i:
1076 1081 return i[0]
1077 1082 else:
1078 1083 #print "next"
1079 1084 gy = y.next()
1080 1085 gx = x.next()
1081 1086 elif gx[0] < gy[0]:
1082 1087 #print "next y"
1083 1088 gy = y.next()
1084 1089 else:
1085 1090 #print "next x"
1086 1091 gx = x.next()
1087 1092
1088 1093 def group(self, nodelist, lookup, infocollect=None):
1089 1094 """calculate a delta group
1090 1095
1091 1096 Given a list of changeset revs, return a set of deltas and
1092 1097 metadata corresponding to nodes. the first delta is
1093 1098 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1094 1099 have this parent as it has all history before these
1095 1100 changesets. parent is parent[0]
1096 1101 """
1097 1102 revs = [self.rev(n) for n in nodelist]
1098 1103
1099 1104 # if we don't have any revisions touched by these changesets, bail
1100 1105 if not revs:
1101 1106 yield changegroup.closechunk()
1102 1107 return
1103 1108
1104 1109 # add the parent of the first rev
1105 1110 p = self.parents(self.node(revs[0]))[0]
1106 1111 revs.insert(0, self.rev(p))
1107 1112
1108 1113 # build deltas
1109 1114 for d in xrange(0, len(revs) - 1):
1110 1115 a, b = revs[d], revs[d + 1]
1111 1116 nb = self.node(b)
1112 1117
1113 1118 if infocollect is not None:
1114 1119 infocollect(nb)
1115 1120
1116 1121 d = self.revdiff(a, b)
1117 1122 p = self.parents(nb)
1118 1123 meta = nb + p[0] + p[1] + lookup(nb)
1119 1124 yield changegroup.genchunk("%s%s" % (meta, d))
1120 1125
1121 1126 yield changegroup.closechunk()
1122 1127
1123 1128 def addgroup(self, revs, linkmapper, transaction, unique=0):
1124 1129 """
1125 1130 add a delta group
1126 1131
1127 1132 given a set of deltas, add them to the revision log. the
1128 1133 first delta is against its parent, which should be in our
1129 1134 log, the rest are against the previous delta.
1130 1135 """
1131 1136
1132 1137 #track the base of the current delta log
1133 1138 r = self.count()
1134 1139 t = r - 1
1135 1140 node = None
1136 1141
1137 1142 base = prev = -1
1138 1143 start = end = textlen = 0
1139 1144 if r:
1140 1145 end = self.end(t)
1141 1146
1142 1147 ifh = self.opener(self.indexfile, "a+")
1143 1148 ifh.seek(0, 2)
1144 1149 transaction.add(self.indexfile, ifh.tell(), self.count())
1145 1150 if self.inlinedata():
1146 1151 dfh = None
1147 1152 else:
1148 1153 transaction.add(self.datafile, end)
1149 1154 dfh = self.opener(self.datafile, "a")
1150 1155
1151 1156 # loop through our set of deltas
1152 1157 chain = None
1153 1158 for chunk in revs:
1154 1159 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1155 1160 link = linkmapper(cs)
1156 1161 if node in self.nodemap:
1157 1162 # this can happen if two branches make the same change
1158 1163 # if unique:
1159 1164 # raise RevlogError(_("already have %s") % hex(node[:4]))
1160 1165 chain = node
1161 1166 continue
1162 1167 delta = chunk[80:]
1163 1168
1164 1169 for p in (p1, p2):
1165 1170 if not p in self.nodemap:
1166 1171 raise RevlogError(_("unknown parent %s") % short(p))
1167 1172
1168 1173 if not chain:
1169 1174 # retrieve the parent revision of the delta chain
1170 1175 chain = p1
1171 1176 if not chain in self.nodemap:
1172 1177 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1173 1178
1174 1179 # full versions are inserted when the needed deltas become
1175 1180 # comparable to the uncompressed text or when the previous
1176 1181 # version is not the one we have a delta against. We use
1177 1182 # the size of the previous full rev as a proxy for the
1178 1183 # current size.
1179 1184
1180 1185 if chain == prev:
1181 1186 tempd = compress(delta)
1182 1187 cdelta = tempd[0] + tempd[1]
1183 1188 textlen = mdiff.patchedsize(textlen, delta)
1184 1189
1185 1190 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1186 1191 # flush our writes here so we can read it in revision
1187 1192 if dfh:
1188 1193 dfh.flush()
1189 1194 ifh.flush()
1190 1195 text = self.revision(chain)
1191 1196 text = self.patches(text, [delta])
1192 1197 chk = self.addrevision(text, transaction, link, p1, p2)
1193 1198 if chk != node:
1194 1199 raise RevlogError(_("consistency error adding group"))
1195 1200 textlen = len(text)
1196 1201 else:
1197 1202 if self.version == REVLOGV0:
1198 1203 e = (end, len(cdelta), base, link, p1, p2, node)
1199 1204 else:
1200 1205 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1201 1206 link, self.rev(p1), self.rev(p2), node)
1202 1207 self.index.append(e)
1203 1208 self.nodemap[node] = r
1204 1209 if self.inlinedata():
1205 1210 ifh.write(struct.pack(self.indexformat, *e))
1206 1211 ifh.write(cdelta)
1207 1212 self.checkinlinesize(transaction, ifh)
1208 1213 if not self.inlinedata():
1209 1214 dfh = self.opener(self.datafile, "a")
1210 1215 ifh = self.opener(self.indexfile, "a")
1211 1216 else:
1212 1217 if not dfh:
1213 1218 # addrevision switched from inline to conventional
1214 1219 # reopen the index
1215 1220 dfh = self.opener(self.datafile, "a")
1216 1221 ifh = self.opener(self.indexfile, "a")
1217 1222 dfh.write(cdelta)
1218 1223 ifh.write(struct.pack(self.indexformat, *e))
1219 1224
1220 1225 t, r, chain, prev = r, r + 1, node, node
1221 1226 base = self.base(t)
1222 1227 start = self.start(base)
1223 1228 end = self.end(t)
1224 1229
1225 1230 return node
1226 1231
1227 1232 def strip(self, rev, minlink):
1228 1233 if self.count() == 0 or rev >= self.count():
1229 1234 return
1230 1235
1231 1236 if isinstance(self.index, lazyindex):
1232 1237 self.loadindexmap()
1233 1238
1234 1239 # When stripping away a revision, we need to make sure it
1235 1240 # does not actually belong to an older changeset.
1236 1241 # The minlink parameter defines the oldest revision
1237 1242 # we're allowed to strip away.
1238 1243 while minlink > self.index[rev][-4]:
1239 1244 rev += 1
1240 1245 if rev >= self.count():
1241 1246 return
1242 1247
1243 1248 # first truncate the files on disk
1244 1249 end = self.start(rev)
1245 1250 if not self.inlinedata():
1246 1251 df = self.opener(self.datafile, "a")
1247 1252 df.truncate(end)
1248 1253 end = rev * struct.calcsize(self.indexformat)
1249 1254 else:
1250 1255 end += rev * struct.calcsize(self.indexformat)
1251 1256
1252 1257 indexf = self.opener(self.indexfile, "a")
1253 1258 indexf.truncate(end)
1254 1259
1255 1260 # then reset internal state in memory to forget those revisions
1256 1261 self.cache = None
1257 1262 self.chunkcache = None
1258 1263 for x in xrange(rev, self.count()):
1259 1264 del self.nodemap[self.node(x)]
1260 1265
1261 1266 del self.index[rev:]
1262 1267
1263 1268 def checksize(self):
1264 1269 expected = 0
1265 1270 if self.count():
1266 1271 expected = self.end(self.count() - 1)
1267 1272
1268 1273 try:
1269 1274 f = self.opener(self.datafile)
1270 1275 f.seek(0, 2)
1271 1276 actual = f.tell()
1272 1277 dd = actual - expected
1273 1278 except IOError, inst:
1274 1279 if inst.errno != errno.ENOENT:
1275 1280 raise
1276 1281 dd = 0
1277 1282
1278 1283 try:
1279 1284 f = self.opener(self.indexfile)
1280 1285 f.seek(0, 2)
1281 1286 actual = f.tell()
1282 1287 s = struct.calcsize(self.indexformat)
1283 1288 i = actual / s
1284 1289 di = actual - (i * s)
1285 1290 if self.inlinedata():
1286 1291 databytes = 0
1287 1292 for r in xrange(self.count()):
1288 1293 databytes += self.length(r)
1289 1294 dd = 0
1290 1295 di = actual - self.count() * s - databytes
1291 1296 except IOError, inst:
1292 1297 if inst.errno != errno.ENOENT:
1293 1298 raise
1294 1299 di = 0
1295 1300
1296 1301 return (dd, di)
1297 1302
1298 1303
General Comments 0
You need to be logged in to leave comments. Login now