##// END OF EJS Templates
revlog: some codingstyle cleanups
Matt Mackall -
r4980:fc44c8df default
parent child Browse files
Show More
@@ -1,1280 +1,1299 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog version strings
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21
22 22 # revlog flags
23 23 REVLOGNGINLINEDATA = (1 << 16)
24 24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25 25
26 26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 28
29 29 def hash(text, p1, p2):
30 30 """generate a hash from the given text and its parent hashes
31 31
32 32 This hash combines both the current file contents and its history
33 33 in a manner that makes it easy to distinguish nodes with the same
34 34 content in the revision graph.
35 35 """
36 36 l = [p1, p2]
37 37 l.sort()
38 38 s = sha.new(l[0])
39 39 s.update(l[1])
40 40 s.update(text)
41 41 return s.digest()
42 42
43 43 def compress(text):
44 44 """ generate a possibly-compressed representation of text """
45 if not text: return ("", text)
45 if not text:
46 return ("", text)
46 47 if len(text) < 44:
47 if text[0] == '\0': return ("", text)
48 if text[0] == '\0':
49 return ("", text)
48 50 return ('u', text)
49 51 bin = zlib.compress(text)
50 52 if len(bin) > len(text):
51 if text[0] == '\0': return ("", text)
53 if text[0] == '\0':
54 return ("", text)
52 55 return ('u', text)
53 56 return ("", bin)
54 57
55 58 def decompress(bin):
56 59 """ decompress the given input """
57 if not bin: return bin
60 if not bin:
61 return bin
58 62 t = bin[0]
59 if t == '\0': return bin
60 if t == 'x': return zlib.decompress(bin)
61 if t == 'u': return bin[1:]
63 if t == '\0':
64 return bin
65 if t == 'x':
66 return zlib.decompress(bin)
67 if t == 'u':
68 return bin[1:]
62 69 raise RevlogError(_("unknown compression type %r") % t)
63 70
64 71 indexformatv0 = ">4l20s20s20s"
65 72 v0shaoffset = 56
66 73 # index ng:
67 74 # 6 bytes offset
68 75 # 2 bytes flags
69 76 # 4 bytes compressed length
70 77 # 4 bytes uncompressed length
71 78 # 4 bytes: base rev
72 79 # 4 bytes link rev
73 80 # 4 bytes parent 1 rev
74 81 # 4 bytes parent 2 rev
75 82 # 32 bytes: nodeid
76 83 indexformatng = ">Qiiiiii20s12x"
77 84 ngshaoffset = 32
78 85 versionformat = ">I"
79 86
80 87 class lazyparser(object):
81 88 """
82 89 this class avoids the need to parse the entirety of large indices
83 90 """
84 91
85 92 # lazyparser is not safe to use on windows if win32 extensions not
86 93 # available. it keeps file handle open, which make it not possible
87 94 # to break hardlinks on local cloned repos.
88 95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
89 96 hasattr(util, 'win32api'))
90 97
91 98 def __init__(self, dataf, size):
92 99 self.dataf = dataf
93 100 self.s = struct.calcsize(indexformatng)
94 101 self.datasize = size
95 102 self.l = size/self.s
96 103 self.index = [None] * self.l
97 104 self.map = {nullid: nullrev}
98 105 self.allmap = 0
99 106 self.all = 0
100 107 self.mapfind_count = 0
101 108
102 109 def loadmap(self):
103 110 """
104 111 during a commit, we need to make sure the rev being added is
105 112 not a duplicate. This requires loading the entire index,
106 113 which is fairly slow. loadmap can load up just the node map,
107 114 which takes much less time.
108 115 """
109 if self.allmap: return
116 if self.allmap:
117 return
110 118 end = self.datasize
111 119 self.allmap = 1
112 120 cur = 0
113 121 count = 0
114 122 blocksize = self.s * 256
115 123 self.dataf.seek(0)
116 124 while cur < end:
117 125 data = self.dataf.read(blocksize)
118 126 off = 0
119 127 for x in xrange(256):
120 128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
121 129 self.map[n] = count
122 130 count += 1
123 131 if count >= self.l:
124 132 break
125 133 off += self.s
126 134 cur += blocksize
127 135
128 136 def loadblock(self, blockstart, blocksize, data=None):
129 if self.all: return
137 if self.all:
138 return
130 139 if data is None:
131 140 self.dataf.seek(blockstart)
132 141 if blockstart + blocksize > self.datasize:
133 142 # the revlog may have grown since we've started running,
134 143 # but we don't have space in self.index for more entries.
135 144 # limit blocksize so that we don't get too much data.
136 145 blocksize = max(self.datasize - blockstart, 0)
137 146 data = self.dataf.read(blocksize)
138 147 lend = len(data) / self.s
139 148 i = blockstart / self.s
140 149 off = 0
141 150 # lazyindex supports __delitem__
142 151 if lend > len(self.index) - i:
143 152 lend = len(self.index) - i
144 153 for x in xrange(lend):
145 154 if self.index[i + x] == None:
146 155 b = data[off : off + self.s]
147 156 self.index[i + x] = b
148 157 n = b[ngshaoffset:ngshaoffset + 20]
149 158 self.map[n] = i + x
150 159 off += self.s
151 160
152 161 def findnode(self, node):
153 162 """search backwards through the index file for a specific node"""
154 if self.allmap: return None
163 if self.allmap:
164 return None
155 165
156 166 # hg log will cause many many searches for the manifest
157 167 # nodes. After we get called a few times, just load the whole
158 168 # thing.
159 169 if self.mapfind_count > 8:
160 170 self.loadmap()
161 171 if node in self.map:
162 172 return node
163 173 return None
164 174 self.mapfind_count += 1
165 175 last = self.l - 1
166 176 while self.index[last] != None:
167 177 if last == 0:
168 178 self.all = 1
169 179 self.allmap = 1
170 180 return None
171 181 last -= 1
172 182 end = (last + 1) * self.s
173 183 blocksize = self.s * 256
174 184 while end >= 0:
175 185 start = max(end - blocksize, 0)
176 186 self.dataf.seek(start)
177 187 data = self.dataf.read(end - start)
178 188 findend = end - start
179 189 while True:
180 190 # we're searching backwards, so weh have to make sure
181 191 # we don't find a changeset where this node is a parent
182 192 off = data.rfind(node, 0, findend)
183 193 findend = off
184 194 if off >= 0:
185 195 i = off / self.s
186 196 off = i * self.s
187 197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
188 198 if n == node:
189 199 self.map[n] = i + start / self.s
190 200 return node
191 201 else:
192 202 break
193 203 end -= blocksize
194 204 return None
195 205
196 206 def loadindex(self, i=None, end=None):
197 if self.all: return
207 if self.all:
208 return
198 209 all = False
199 210 if i == None:
200 211 blockstart = 0
201 212 blocksize = (512 / self.s) * self.s
202 213 end = self.datasize
203 214 all = True
204 215 else:
205 216 if end:
206 217 blockstart = i * self.s
207 218 end = end * self.s
208 219 blocksize = end - blockstart
209 220 else:
210 221 blockstart = (i & ~63) * self.s
211 222 blocksize = self.s * 64
212 223 end = blockstart + blocksize
213 224 while blockstart < end:
214 225 self.loadblock(blockstart, blocksize)
215 226 blockstart += blocksize
216 if all: self.all = True
227 if all:
228 self.all = True
217 229
218 230 class lazyindex(object):
219 231 """a lazy version of the index array"""
220 232 def __init__(self, parser):
221 233 self.p = parser
222 234 def __len__(self):
223 235 return len(self.p.index)
224 236 def load(self, pos):
225 237 if pos < 0:
226 238 pos += len(self.p.index)
227 239 self.p.loadindex(pos)
228 240 return self.p.index[pos]
229 241 def __getitem__(self, pos):
230 242 ret = self.p.index[pos] or self.load(pos)
231 243 if isinstance(ret, str):
232 244 ret = struct.unpack(indexformatng, ret)
233 245 return ret
234 246 def __setitem__(self, pos, item):
235 247 self.p.index[pos] = item
236 248 def __delitem__(self, pos):
237 249 del self.p.index[pos]
238 250 def insert(self, pos, e):
239 251 self.p.index.insert(pos, e)
240 252 def append(self, e):
241 253 self.p.index.append(e)
242 254
243 255 class lazymap(object):
244 256 """a lazy version of the node map"""
245 257 def __init__(self, parser):
246 258 self.p = parser
247 259 def load(self, key):
248 260 n = self.p.findnode(key)
249 261 if n == None:
250 262 raise KeyError(key)
251 263 def __contains__(self, key):
252 264 if key in self.p.map:
253 265 return True
254 266 self.p.loadmap()
255 267 return key in self.p.map
256 268 def __iter__(self):
257 269 yield nullid
258 270 for i in xrange(self.p.l):
259 271 ret = self.p.index[i]
260 272 if not ret:
261 273 self.p.loadindex(i)
262 274 ret = self.p.index[i]
263 275 if isinstance(ret, str):
264 276 ret = struct.unpack(indexformatng, ret)
265 277 yield ret[7]
266 278 def __getitem__(self, key):
267 279 try:
268 280 return self.p.map[key]
269 281 except KeyError:
270 282 try:
271 283 self.load(key)
272 284 return self.p.map[key]
273 285 except KeyError:
274 286 raise KeyError("node " + hex(key))
275 287 def __setitem__(self, key, val):
276 288 self.p.map[key] = val
277 289 def __delitem__(self, key):
278 290 del self.p.map[key]
279 291
280 class RevlogError(Exception): pass
281 class LookupError(RevlogError): pass
292 class RevlogError(Exception):
293 pass
294 class LookupError(RevlogError):
295 pass
282 296
283 297 def getoffset(q):
284 298 if q & 0xFFFF:
285 299 raise RevlogError(_('incompatible revision flag %x') % q)
286 300 return int(q >> 16)
287 301
288 302 def gettype(q):
289 303 return int(q & 0xFFFF)
290 304
291 305 def offset_type(offset, type):
292 306 return long(long(offset) << 16 | type)
293 307
294 308 class revlogoldio(object):
295 309 def __init__(self):
296 310 self.chunkcache = None
297 311 self.size = struct.calcsize(indexformatv0)
298 312
299 313 def parseindex(self, fp, st, inline):
300 314 s = self.size
301 315 index = []
302 316 nodemap = {nullid: nullrev}
303 317 n = off = 0
304 318 data = fp.read()
305 319 l = len(data)
306 320 while off + s <= l:
307 321 cur = data[off:off + s]
308 322 off += s
309 323 e = struct.unpack(indexformatv0, cur)
310 324 # transform to revlogv1 format
311 325 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
312 326 nodemap[e[4]], nodemap[e[5]], e[6])
313 327 index.append(e2)
314 328 nodemap[e[6]] = n
315 329 n += 1
316 330
317 331 return index, nodemap
318 332
319 333 class revlogio(object):
320 334 def __init__(self):
321 335 self.chunkcache = None
322 336 self.size = struct.calcsize(indexformatng)
323 337
324 338 def parseindex(self, fp, st, inline):
325 339 if (lazyparser.safe_to_use and not inline and
326 340 st and st.st_size > 1000000):
327 341 # big index, let's parse it on demand
328 342 parser = lazyparser(fp, st.st_size)
329 343 index = lazyindex(parser)
330 344 nodemap = lazymap(parser)
331 345 e = list(index[0])
332 346 type = gettype(e[0])
333 347 e[0] = offset_type(0, type)
334 348 index[0] = e
335 349 return index, nodemap
336 350
337 351 s = self.size
338 352 index = []
339 353 nodemap = {nullid: nullrev}
340 354 n = off = 0
341 355 # if we're not using lazymap, always read the whole index
342 356 data = fp.read()
343 357 l = len(data)
344 358 if inline:
345 359 self.chunkcache = (0, data)
346 360 while off + s <= l:
347 361 e = struct.unpack(indexformatng, data[off:off + s])
348 362 index.append(e)
349 363 nodemap[e[7]] = n
350 364 n += 1
351 365 off += s
352 366 if inline:
353 367 if e[1] < 0:
354 368 break
355 369 off += e[1]
356 370
357 371 e = list(index[0])
358 372 type = gettype(e[0])
359 373 e[0] = offset_type(0, type)
360 374 index[0] = e
361 375
362 376 return index, nodemap
363 377
364 378 class revlog(object):
365 379 """
366 380 the underlying revision storage object
367 381
368 382 A revlog consists of two parts, an index and the revision data.
369 383
370 384 The index is a file with a fixed record size containing
371 385 information on each revision, includings its nodeid (hash), the
372 386 nodeids of its parents, the position and offset of its data within
373 387 the data file, and the revision it's based on. Finally, each entry
374 388 contains a linkrev entry that can serve as a pointer to external
375 389 data.
376 390
377 391 The revision data itself is a linear collection of data chunks.
378 392 Each chunk represents a revision and is usually represented as a
379 393 delta against the previous chunk. To bound lookup time, runs of
380 394 deltas are limited to about 2 times the length of the original
381 395 version data. This makes retrieval of a version proportional to
382 396 its size, or O(1) relative to the number of revisions.
383 397
384 398 Both pieces of the revlog are written to in an append-only
385 399 fashion, which means we never need to rewrite a file to insert or
386 400 remove data, and can use some simple techniques to avoid the need
387 401 for locking while reading.
388 402 """
389 403 def __init__(self, opener, indexfile):
390 404 """
391 405 create a revlog object
392 406
393 407 opener is a function that abstracts the file opening operation
394 408 and can be used to implement COW semantics or the like.
395 409 """
396 410 self.indexfile = indexfile
397 411 self.datafile = indexfile[:-2] + ".d"
398 412 self.opener = opener
399 413
400 414 self.indexstat = None
401 415 self.cache = None
402 416 self.defversion = REVLOG_DEFAULT_VERSION
403 417 if hasattr(opener, "defversion"):
404 418 self.defversion = opener.defversion
405 419 if self.defversion & REVLOGNG:
406 420 self.defversion |= REVLOGNGINLINEDATA
407 421 self._load()
408 422
409 423 def _load(self):
410 424 v = self.defversion
411 425 try:
412 426 f = self.opener(self.indexfile)
413 427 i = f.read(4)
414 428 f.seek(0)
415 429 if len(i) > 0:
416 430 v = struct.unpack(versionformat, i)[0]
417 431 except IOError, inst:
418 432 if inst.errno != errno.ENOENT:
419 433 raise
420 434 i = ""
421 435 else:
422 436 try:
423 437 st = util.fstat(f)
424 438 except AttributeError, inst:
425 439 st = None
426 440 else:
427 441 oldst = self.indexstat
428 442 if (oldst and st.st_dev == oldst.st_dev
429 443 and st.st_ino == oldst.st_ino
430 444 and st.st_mtime == oldst.st_mtime
431 445 and st.st_ctime == oldst.st_ctime
432 446 and st.st_size == oldst.st_size):
433 447 return
434 448 self.indexstat = st
435 449 flags = v & ~0xFFFF
436 450 fmt = v & 0xFFFF
437 451 if fmt == REVLOGV0:
438 452 if flags:
439 453 raise RevlogError(_("index %s unknown flags %#04x for format v0")
440 454 % (self.indexfile, flags >> 16))
441 455 elif fmt == REVLOGNG:
442 456 if flags & ~REVLOGNGINLINEDATA:
443 457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
444 458 % (self.indexfile, flags >> 16))
445 459 else:
446 460 raise RevlogError(_("index %s unknown format %d")
447 461 % (self.indexfile, fmt))
448 462 self.version = v
449 463 self.nodemap = {nullid: nullrev}
450 464 self.index = []
451 465 self._io = revlogio()
452 466 if self.version == REVLOGV0:
453 467 self._io = revlogoldio()
454 468 if i:
455 469 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
456 470 # add the magic null revision at -1
457 471 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
458 472
459 473 def _loadindex(self, start, end):
460 474 """load a block of indexes all at once from the lazy parser"""
461 475 if isinstance(self.index, lazyindex):
462 476 self.index.p.loadindex(start, end)
463 477
464 478 def _loadindexmap(self):
465 479 """loads both the map and the index from the lazy parser"""
466 480 if isinstance(self.index, lazyindex):
467 481 p = self.index.p
468 482 p.loadindex()
469 483 self.nodemap = p.map
470 484
471 485 def _loadmap(self):
472 486 """loads the map from the lazy parser"""
473 487 if isinstance(self.nodemap, lazymap):
474 488 self.nodemap.p.loadmap()
475 489 self.nodemap = self.nodemap.p.map
476 490
477 def _inline(self): return self.version & REVLOGNGINLINEDATA
491 def _inline(self):
492 return self.version & REVLOGNGINLINEDATA
493 def tip(self):
494 return self.node(len(self.index) - 2)
495 def count(self):
496 return len(self.index) - 1
478 497
479 def tip(self): return self.node(len(self.index) - 2)
480 def count(self): return len(self.index) - 1
481 def node(self, rev):
482 return self.index[rev][7]
483 498 def rev(self, node):
484 499 try:
485 500 return self.nodemap[node]
486 501 except KeyError:
487 502 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
503 def node(self, rev):
504 return self.index[rev][7]
488 505 def linkrev(self, node):
489 506 return self.index[self.rev(node)][4]
490 507 def parents(self, node):
491 508 d = self.index[self.rev(node)][5:7]
492 509 return (self.node(d[0]), self.node(d[1]))
493 510 def parentrevs(self, rev):
494 511 return self.index[rev][5:7]
495 512 def start(self, rev):
496 513 return getoffset(self.index[rev][0])
497 def end(self, rev): return self.start(rev) + self.length(rev)
514 def end(self, rev):
515 return self.start(rev) + self.length(rev)
516 def length(self, rev):
517 return self.index[rev][1]
518 def base(self, rev):
519 return self.index[rev][3]
498 520
499 521 def size(self, rev):
500 522 """return the length of the uncompressed text for a given revision"""
501 523 l = self.index[rev][2]
502 524 if l >= 0:
503 525 return l
504 526
505 527 t = self.revision(self.node(rev))
506 528 return len(t)
507 529
508 530 # alternate implementation, The advantage to this code is it
509 531 # will be faster for a single revision. But, the results are not
510 532 # cached, so finding the size of every revision will be slower.
511 533 """
512 534 if self.cache and self.cache[1] == rev:
513 535 return len(self.cache[2])
514 536
515 537 base = self.base(rev)
516 538 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
517 539 base = self.cache[1]
518 540 text = self.cache[2]
519 541 else:
520 542 text = self.revision(self.node(base))
521 543
522 544 l = len(text)
523 545 for x in xrange(base + 1, rev + 1):
524 546 l = mdiff.patchedsize(l, self.chunk(x))
525 547 return l
526 548 """
527 549
528 def length(self, rev):
529 return self.index[rev][1]
530 def base(self, rev):
531 return self.index[rev][3]
532
533 550 def reachable(self, node, stop=None):
534 551 """return a hash of all nodes ancestral to a given node, including
535 552 the node itself, stopping when stop is matched"""
536 553 reachable = {}
537 554 visit = [node]
538 555 reachable[node] = 1
539 556 if stop:
540 557 stopn = self.rev(stop)
541 558 else:
542 559 stopn = 0
543 560 while visit:
544 561 n = visit.pop(0)
545 562 if n == stop:
546 563 continue
547 564 if n == nullid:
548 565 continue
549 566 for p in self.parents(n):
550 567 if self.rev(p) < stopn:
551 568 continue
552 569 if p not in reachable:
553 570 reachable[p] = 1
554 571 visit.append(p)
555 572 return reachable
556 573
557 574 def nodesbetween(self, roots=None, heads=None):
558 575 """Return a tuple containing three elements. Elements 1 and 2 contain
559 576 a final list bases and heads after all the unreachable ones have been
560 577 pruned. Element 0 contains a topologically sorted list of all
561 578
562 579 nodes that satisfy these constraints:
563 580 1. All nodes must be descended from a node in roots (the nodes on
564 581 roots are considered descended from themselves).
565 582 2. All nodes must also be ancestors of a node in heads (the nodes in
566 583 heads are considered to be their own ancestors).
567 584
568 585 If roots is unspecified, nullid is assumed as the only root.
569 586 If heads is unspecified, it is taken to be the output of the
570 587 heads method (i.e. a list of all nodes in the repository that
571 588 have no children)."""
572 589 nonodes = ([], [], [])
573 590 if roots is not None:
574 591 roots = list(roots)
575 592 if not roots:
576 593 return nonodes
577 594 lowestrev = min([self.rev(n) for n in roots])
578 595 else:
579 596 roots = [nullid] # Everybody's a descendent of nullid
580 597 lowestrev = nullrev
581 598 if (lowestrev == nullrev) and (heads is None):
582 599 # We want _all_ the nodes!
583 600 return ([self.node(r) for r in xrange(0, self.count())],
584 601 [nullid], list(self.heads()))
585 602 if heads is None:
586 603 # All nodes are ancestors, so the latest ancestor is the last
587 604 # node.
588 605 highestrev = self.count() - 1
589 606 # Set ancestors to None to signal that every node is an ancestor.
590 607 ancestors = None
591 608 # Set heads to an empty dictionary for later discovery of heads
592 609 heads = {}
593 610 else:
594 611 heads = list(heads)
595 612 if not heads:
596 613 return nonodes
597 614 ancestors = {}
598 615 # Turn heads into a dictionary so we can remove 'fake' heads.
599 616 # Also, later we will be using it to filter out the heads we can't
600 617 # find from roots.
601 618 heads = dict.fromkeys(heads, 0)
602 619 # Start at the top and keep marking parents until we're done.
603 620 nodestotag = heads.keys()
604 621 # Remember where the top was so we can use it as a limit later.
605 622 highestrev = max([self.rev(n) for n in nodestotag])
606 623 while nodestotag:
607 624 # grab a node to tag
608 625 n = nodestotag.pop()
609 626 # Never tag nullid
610 627 if n == nullid:
611 628 continue
612 629 # A node's revision number represents its place in a
613 630 # topologically sorted list of nodes.
614 631 r = self.rev(n)
615 632 if r >= lowestrev:
616 633 if n not in ancestors:
617 634 # If we are possibly a descendent of one of the roots
618 635 # and we haven't already been marked as an ancestor
619 636 ancestors[n] = 1 # Mark as ancestor
620 637 # Add non-nullid parents to list of nodes to tag.
621 638 nodestotag.extend([p for p in self.parents(n) if
622 639 p != nullid])
623 640 elif n in heads: # We've seen it before, is it a fake head?
624 641 # So it is, real heads should not be the ancestors of
625 642 # any other heads.
626 643 heads.pop(n)
627 644 if not ancestors:
628 645 return nonodes
629 646 # Now that we have our set of ancestors, we want to remove any
630 647 # roots that are not ancestors.
631 648
632 649 # If one of the roots was nullid, everything is included anyway.
633 650 if lowestrev > nullrev:
634 651 # But, since we weren't, let's recompute the lowest rev to not
635 652 # include roots that aren't ancestors.
636 653
637 654 # Filter out roots that aren't ancestors of heads
638 655 roots = [n for n in roots if n in ancestors]
639 656 # Recompute the lowest revision
640 657 if roots:
641 658 lowestrev = min([self.rev(n) for n in roots])
642 659 else:
643 660 # No more roots? Return empty list
644 661 return nonodes
645 662 else:
646 663 # We are descending from nullid, and don't need to care about
647 664 # any other roots.
648 665 lowestrev = nullrev
649 666 roots = [nullid]
650 667 # Transform our roots list into a 'set' (i.e. a dictionary where the
651 668 # values don't matter.
652 669 descendents = dict.fromkeys(roots, 1)
653 670 # Also, keep the original roots so we can filter out roots that aren't
654 671 # 'real' roots (i.e. are descended from other roots).
655 672 roots = descendents.copy()
656 673 # Our topologically sorted list of output nodes.
657 674 orderedout = []
658 675 # Don't start at nullid since we don't want nullid in our output list,
659 676 # and if nullid shows up in descedents, empty parents will look like
660 677 # they're descendents.
661 678 for r in xrange(max(lowestrev, 0), highestrev + 1):
662 679 n = self.node(r)
663 680 isdescendent = False
664 681 if lowestrev == nullrev: # Everybody is a descendent of nullid
665 682 isdescendent = True
666 683 elif n in descendents:
667 684 # n is already a descendent
668 685 isdescendent = True
669 686 # This check only needs to be done here because all the roots
670 687 # will start being marked is descendents before the loop.
671 688 if n in roots:
672 689 # If n was a root, check if it's a 'real' root.
673 690 p = tuple(self.parents(n))
674 691 # If any of its parents are descendents, it's not a root.
675 692 if (p[0] in descendents) or (p[1] in descendents):
676 693 roots.pop(n)
677 694 else:
678 695 p = tuple(self.parents(n))
679 696 # A node is a descendent if either of its parents are
680 697 # descendents. (We seeded the dependents list with the roots
681 698 # up there, remember?)
682 699 if (p[0] in descendents) or (p[1] in descendents):
683 700 descendents[n] = 1
684 701 isdescendent = True
685 702 if isdescendent and ((ancestors is None) or (n in ancestors)):
686 703 # Only include nodes that are both descendents and ancestors.
687 704 orderedout.append(n)
688 705 if (ancestors is not None) and (n in heads):
689 706 # We're trying to figure out which heads are reachable
690 707 # from roots.
691 708 # Mark this head as having been reached
692 709 heads[n] = 1
693 710 elif ancestors is None:
694 711 # Otherwise, we're trying to discover the heads.
695 712 # Assume this is a head because if it isn't, the next step
696 713 # will eventually remove it.
697 714 heads[n] = 1
698 715 # But, obviously its parents aren't.
699 716 for p in self.parents(n):
700 717 heads.pop(p, None)
701 718 heads = [n for n in heads.iterkeys() if heads[n] != 0]
702 719 roots = roots.keys()
703 720 assert orderedout
704 721 assert roots
705 722 assert heads
706 723 return (orderedout, roots, heads)
707 724
708 725 def heads(self, start=None, stop=None):
709 726 """return the list of all nodes that have no children
710 727
711 728 if start is specified, only heads that are descendants of
712 729 start will be returned
713 730 if stop is specified, it will consider all the revs from stop
714 731 as if they had no children
715 732 """
716 733 if start is None:
717 734 start = nullid
718 735 if stop is None:
719 736 stop = []
720 737 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
721 738 startrev = self.rev(start)
722 739 reachable = {startrev: 1}
723 740 heads = {startrev: 1}
724 741
725 742 parentrevs = self.parentrevs
726 743 for r in xrange(startrev + 1, self.count()):
727 744 for p in parentrevs(r):
728 745 if p in reachable:
729 746 if r not in stoprevs:
730 747 reachable[r] = 1
731 748 heads[r] = 1
732 749 if p in heads and p not in stoprevs:
733 750 del heads[p]
734 751
735 752 return [self.node(r) for r in heads]
736 753
737 754 def children(self, node):
738 755 """find the children of a given node"""
739 756 c = []
740 757 p = self.rev(node)
741 758 for r in range(p + 1, self.count()):
742 759 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
743 760 if prevs:
744 761 for pr in prevs:
745 762 if pr == p:
746 763 c.append(self.node(r))
747 764 elif p == nullrev:
748 765 c.append(self.node(r))
749 766 return c
750 767
751 768 def _match(self, id):
752 769 if isinstance(id, (long, int)):
753 770 # rev
754 771 return self.node(id)
755 772 if len(id) == 20:
756 773 # possibly a binary node
757 774 # odds of a binary node being all hex in ASCII are 1 in 10**25
758 775 try:
759 776 node = id
760 777 r = self.rev(node) # quick search the index
761 778 return node
762 779 except LookupError:
763 780 pass # may be partial hex id
764 781 try:
765 782 # str(rev)
766 783 rev = int(id)
767 if str(rev) != id: raise ValueError
768 if rev < 0: rev = self.count() + rev
769 if rev < 0 or rev >= self.count(): raise ValueError
784 if str(rev) != id:
785 raise ValueError
786 if rev < 0:
787 rev = self.count() + rev
788 if rev < 0 or rev >= self.count():
789 raise ValueError
770 790 return self.node(rev)
771 791 except (ValueError, OverflowError):
772 792 pass
773 793 if len(id) == 40:
774 794 try:
775 795 # a full hex nodeid?
776 796 node = bin(id)
777 797 r = self.rev(node)
778 798 return node
779 799 except TypeError:
780 800 pass
781 801
782 802 def _partialmatch(self, id):
783 803 if len(id) < 40:
784 804 try:
785 805 # hex(node)[:...]
786 806 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
787 807 node = None
788 808 for n in self.nodemap:
789 809 if n.startswith(bin_id) and hex(n).startswith(id):
790 810 if node is not None:
791 811 raise LookupError(_("Ambiguous identifier"))
792 812 node = n
793 813 if node is not None:
794 814 return node
795 815 except TypeError:
796 816 pass
797 817
798 818 def lookup(self, id):
799 819 """locate a node based on:
800 820 - revision number or str(revision number)
801 821 - nodeid or subset of hex nodeid
802 822 """
803
804 823 n = self._match(id)
805 824 if n is not None:
806 825 return n
807 826 n = self._partialmatch(id)
808 827 if n:
809 828 return n
810 829
811 830 raise LookupError(_("No match found"))
812 831
813 832 def cmp(self, node, text):
814 833 """compare text with a given file revision"""
815 834 p1, p2 = self.parents(node)
816 835 return hash(text, p1, p2) != node
817 836
818 837 def diff(self, a, b):
819 838 """return a delta between two revisions"""
820 839 return mdiff.textdiff(a, b)
821 840
822 841 def patches(self, t, pl):
823 842 """apply a list of patches to a string"""
824 843 return mdiff.patches(t, pl)
825 844
826 845 def chunk(self, rev, df=None, cachelen=4096):
827 846 start, length = self.start(rev), self.length(rev)
828 847 inline = self._inline()
829 848 if inline:
830 849 start += (rev + 1) * self._io.size
831 850 end = start + length
832 851 def loadcache(df):
833 852 cache_length = max(cachelen, length) # 4k
834 853 if not df:
835 854 if inline:
836 855 df = self.opener(self.indexfile)
837 856 else:
838 857 df = self.opener(self.datafile)
839 858 df.seek(start)
840 859 self._io.chunkcache = (start, df.read(cache_length))
841 860
842 861 if not self._io.chunkcache:
843 862 loadcache(df)
844 863
845 864 cache_start = self._io.chunkcache[0]
846 865 cache_end = cache_start + len(self._io.chunkcache[1])
847 866 if start >= cache_start and end <= cache_end:
848 867 # it is cached
849 868 offset = start - cache_start
850 869 else:
851 870 loadcache(df)
852 871 offset = 0
853 872
854 #def checkchunk():
855 # df = self.opener(self.datafile)
856 # df.seek(start)
857 # return df.read(length)
858 #assert s == checkchunk()
859 873 return decompress(self._io.chunkcache[1][offset:offset + length])
860 874
861 875 def delta(self, node):
862 876 """return or calculate a delta between a node and its predecessor"""
863 877 r = self.rev(node)
864 878 return self.revdiff(r - 1, r)
865 879
866 880 def revdiff(self, rev1, rev2):
867 881 """return or calculate a delta between two revisions"""
868 882 b1 = self.base(rev1)
869 883 b2 = self.base(rev2)
870 884 if b1 == b2 and rev1 + 1 == rev2:
871 885 return self.chunk(rev2)
872 886 else:
873 887 return self.diff(self.revision(self.node(rev1)),
874 888 self.revision(self.node(rev2)))
875 889
876 890 def revision(self, node):
877 891 """return an uncompressed revision of a given"""
878 if node == nullid: return ""
879 if self.cache and self.cache[0] == node: return self.cache[2]
892 if node == nullid:
893 return ""
894 if self.cache and self.cache[0] == node:
895 return self.cache[2]
880 896
881 897 # look up what we need to read
882 898 text = None
883 899 rev = self.rev(node)
884 900 base = self.base(rev)
885 901
886 902 if self._inline():
887 903 # we probably have the whole chunk cached
888 904 df = None
889 905 else:
890 906 df = self.opener(self.datafile)
891 907
892 908 # do we have useful data cached?
893 909 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
894 910 base = self.cache[1]
895 911 text = self.cache[2]
896 912 self._loadindex(base, rev + 1)
897 913 else:
898 914 self._loadindex(base, rev + 1)
899 915 text = self.chunk(base, df=df)
900 916
901 917 bins = []
902 918 for r in xrange(base + 1, rev + 1):
903 919 bins.append(self.chunk(r, df=df))
904 920
905 921 text = self.patches(text, bins)
906 922
907 923 p1, p2 = self.parents(node)
908 924 if node != hash(text, p1, p2):
909 925 raise RevlogError(_("integrity check failed on %s:%d")
910 926 % (self.datafile, rev))
911 927
912 928 self.cache = (node, rev, text)
913 929 return text
914 930
915 931 def checkinlinesize(self, tr, fp=None):
916 932 if not self._inline():
917 933 return
918 934 if not fp:
919 935 fp = self.opener(self.indexfile, 'r')
920 936 fp.seek(0, 2)
921 937 size = fp.tell()
922 938 if size < 131072:
923 939 return
924 940 trinfo = tr.find(self.indexfile)
925 941 if trinfo == None:
926 942 raise RevlogError(_("%s not found in the transaction")
927 943 % self.indexfile)
928 944
929 945 trindex = trinfo[2]
930 946 dataoff = self.start(trindex)
931 947
932 948 tr.add(self.datafile, dataoff)
933 949 df = self.opener(self.datafile, 'w')
934 950 calc = self._io.size
935 951 for r in xrange(self.count()):
936 952 start = self.start(r) + (r + 1) * calc
937 953 length = self.length(r)
938 954 fp.seek(start)
939 955 d = fp.read(length)
940 956 df.write(d)
941 957 fp.close()
942 958 df.close()
943 959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
944 960 self.version &= ~(REVLOGNGINLINEDATA)
945 961 if self.count():
946 962 x = self.index[0]
947 963 e = struct.pack(indexformatng, *x)[4:]
948 964 l = struct.pack(versionformat, self.version)
949 965 fp.write(l)
950 966 fp.write(e)
951 967
952 968 for i in xrange(1, self.count()):
953 969 x = self.index[i]
954 970 e = struct.pack(indexformatng, *x)
955 971 fp.write(e)
956 972
957 973 # if we don't call rename, the temp file will never replace the
958 974 # real index
959 975 fp.rename()
960 976
961 977 tr.replace(self.indexfile, trindex * calc)
962 978 self._io.chunkcache = None
963 979
964 980 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
965 981 """add a revision to the log
966 982
967 983 text - the revision data to add
968 984 transaction - the transaction object used for rollback
969 985 link - the linkrev data to add
970 986 p1, p2 - the parent nodeids of the revision
971 987 d - an optional precomputed delta
972 988 """
973 989 if not self._inline():
974 990 dfh = self.opener(self.datafile, "a")
975 991 else:
976 992 dfh = None
977 993 ifh = self.opener(self.indexfile, "a+")
978 994 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
979 995
980 996 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
981 if text is None: text = ""
982 if p1 is None: p1 = self.tip()
983 if p2 is None: p2 = nullid
997 if text is None:
998 text = ""
999 if p1 is None:
1000 p1 = self.tip()
1001 if p2 is None:
1002 p2 = nullid
984 1003
985 1004 node = hash(text, p1, p2)
986 1005
987 1006 if node in self.nodemap:
988 1007 return node
989 1008
990 1009 n = self.count()
991 1010 t = n - 1
992 1011
993 1012 if n:
994 1013 base = self.base(t)
995 1014 start = self.start(base)
996 1015 end = self.end(t)
997 1016 if not d:
998 1017 prev = self.revision(self.tip())
999 1018 d = self.diff(prev, text)
1000 1019 data = compress(d)
1001 1020 l = len(data[1]) + len(data[0])
1002 1021 dist = end - start + l
1003 1022
1004 1023 # full versions are inserted when the needed deltas
1005 1024 # become comparable to the uncompressed text
1006 1025 if not n or dist > len(text) * 2:
1007 1026 data = compress(text)
1008 1027 l = len(data[1]) + len(data[0])
1009 1028 base = n
1010 1029 else:
1011 1030 base = self.base(t)
1012 1031
1013 1032 offset = 0
1014 1033 if t >= 0:
1015 1034 offset = self.end(t)
1016 1035
1017 1036 e = (offset_type(offset, 0), l, len(text),
1018 1037 base, link, self.rev(p1), self.rev(p2), node)
1019 1038
1020 1039 self.index.insert(-1, e)
1021 1040 self.nodemap[node] = n
1022 1041
1023 1042 if self.version == REVLOGV0:
1024 1043 e = (offset, l, base, link, p1, p2, node)
1025 1044 entry = struct.pack(indexformatv0, *e)
1026 1045 else:
1027 1046 entry = struct.pack(indexformatng, *e)
1028 1047
1029 1048 if not self._inline():
1030 1049 transaction.add(self.datafile, offset)
1031 1050 transaction.add(self.indexfile, n * len(entry))
1032 1051 if data[0]:
1033 1052 dfh.write(data[0])
1034 1053 dfh.write(data[1])
1035 1054 dfh.flush()
1036 1055 else:
1037 1056 ifh.seek(0, 2)
1038 1057 transaction.add(self.indexfile, ifh.tell(), self.count() - 1)
1039 1058
1040 1059 if self.count() == 1 and self.version != REVLOGV0:
1041 1060 l = struct.pack(versionformat, self.version)
1042 1061 ifh.write(l)
1043 1062 entry = entry[4:]
1044 1063
1045 1064 ifh.write(entry)
1046 1065
1047 1066 if self._inline():
1048 1067 ifh.write(data[0])
1049 1068 ifh.write(data[1])
1050 1069 self.checkinlinesize(transaction, ifh)
1051 1070
1052 1071 self.cache = (node, n, text)
1053 1072 return node
1054 1073
1055 1074 def ancestor(self, a, b):
1056 1075 """calculate the least common ancestor of nodes a and b"""
1057 1076
1058 1077 def parents(rev):
1059 1078 return [p for p in self.parentrevs(rev) if p != nullrev]
1060 1079
1061 1080 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1062 1081 if c is None:
1063 1082 return nullid
1064 1083
1065 1084 return self.node(c)
1066 1085
1067 1086 def group(self, nodelist, lookup, infocollect=None):
1068 1087 """calculate a delta group
1069 1088
1070 1089 Given a list of changeset revs, return a set of deltas and
1071 1090 metadata corresponding to nodes. the first delta is
1072 1091 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1073 1092 have this parent as it has all history before these
1074 1093 changesets. parent is parent[0]
1075 1094 """
1076 1095 revs = [self.rev(n) for n in nodelist]
1077 1096
1078 1097 # if we don't have any revisions touched by these changesets, bail
1079 1098 if not revs:
1080 1099 yield changegroup.closechunk()
1081 1100 return
1082 1101
1083 1102 # add the parent of the first rev
1084 1103 p = self.parents(self.node(revs[0]))[0]
1085 1104 revs.insert(0, self.rev(p))
1086 1105
1087 1106 # build deltas
1088 1107 for d in xrange(0, len(revs) - 1):
1089 1108 a, b = revs[d], revs[d + 1]
1090 1109 nb = self.node(b)
1091 1110
1092 1111 if infocollect is not None:
1093 1112 infocollect(nb)
1094 1113
1095 1114 d = self.revdiff(a, b)
1096 1115 p = self.parents(nb)
1097 1116 meta = nb + p[0] + p[1] + lookup(nb)
1098 1117 yield changegroup.genchunk("%s%s" % (meta, d))
1099 1118
1100 1119 yield changegroup.closechunk()
1101 1120
1102 1121 def addgroup(self, revs, linkmapper, transaction, unique=0):
1103 1122 """
1104 1123 add a delta group
1105 1124
1106 1125 given a set of deltas, add them to the revision log. the
1107 1126 first delta is against its parent, which should be in our
1108 1127 log, the rest are against the previous delta.
1109 1128 """
1110 1129
1111 1130 #track the base of the current delta log
1112 1131 r = self.count()
1113 1132 t = r - 1
1114 1133 node = None
1115 1134
1116 1135 base = prev = nullrev
1117 1136 start = end = textlen = 0
1118 1137 if r:
1119 1138 end = self.end(t)
1120 1139
1121 1140 ifh = self.opener(self.indexfile, "a+")
1122 1141 ifh.seek(0, 2)
1123 1142 transaction.add(self.indexfile, ifh.tell(), self.count())
1124 1143 if self._inline():
1125 1144 dfh = None
1126 1145 else:
1127 1146 transaction.add(self.datafile, end)
1128 1147 dfh = self.opener(self.datafile, "a")
1129 1148
1130 1149 # loop through our set of deltas
1131 1150 chain = None
1132 1151 for chunk in revs:
1133 1152 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1134 1153 link = linkmapper(cs)
1135 1154 if node in self.nodemap:
1136 1155 # this can happen if two branches make the same change
1137 1156 # if unique:
1138 1157 # raise RevlogError(_("already have %s") % hex(node[:4]))
1139 1158 chain = node
1140 1159 continue
1141 1160 delta = chunk[80:]
1142 1161
1143 1162 for p in (p1, p2):
1144 1163 if not p in self.nodemap:
1145 1164 raise LookupError(_("unknown parent %s") % short(p))
1146 1165
1147 1166 if not chain:
1148 1167 # retrieve the parent revision of the delta chain
1149 1168 chain = p1
1150 1169 if not chain in self.nodemap:
1151 1170 raise LookupError(_("unknown base %s") % short(chain[:4]))
1152 1171
1153 1172 # full versions are inserted when the needed deltas become
1154 1173 # comparable to the uncompressed text or when the previous
1155 1174 # version is not the one we have a delta against. We use
1156 1175 # the size of the previous full rev as a proxy for the
1157 1176 # current size.
1158 1177
1159 1178 if chain == prev:
1160 1179 tempd = compress(delta)
1161 1180 cdelta = tempd[0] + tempd[1]
1162 1181 textlen = mdiff.patchedsize(textlen, delta)
1163 1182
1164 1183 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1165 1184 # flush our writes here so we can read it in revision
1166 1185 if dfh:
1167 1186 dfh.flush()
1168 1187 ifh.flush()
1169 1188 text = self.revision(chain)
1170 1189 text = self.patches(text, [delta])
1171 1190 chk = self._addrevision(text, transaction, link, p1, p2, None,
1172 1191 ifh, dfh)
1173 1192 if not dfh and not self._inline():
1174 1193 # addrevision switched from inline to conventional
1175 1194 # reopen the index
1176 1195 dfh = self.opener(self.datafile, "a")
1177 1196 ifh = self.opener(self.indexfile, "a")
1178 1197 if chk != node:
1179 1198 raise RevlogError(_("consistency error adding group"))
1180 1199 textlen = len(text)
1181 1200 else:
1182 1201 e = (offset_type(end, 0), len(cdelta), textlen, base,
1183 1202 link, self.rev(p1), self.rev(p2), node)
1184 1203 self.index.insert(-1, e)
1185 1204 self.nodemap[node] = r
1186 1205 if self._inline():
1187 1206 ifh.write(struct.pack(indexformatng, *e))
1188 1207 ifh.write(cdelta)
1189 1208 self.checkinlinesize(transaction, ifh)
1190 1209 if not self._inline():
1191 1210 dfh = self.opener(self.datafile, "a")
1192 1211 ifh = self.opener(self.indexfile, "a")
1193 1212 else:
1194 1213 if self.version == REVLOGV0:
1195 1214 e = (end, len(cdelta), base, link, p1, p2, node)
1196 1215 entry = struct.pack(indexformatv0, *e)
1197 1216 else:
1198 1217 entry = struct.pack(indexformatng, *e)
1199 1218 dfh.write(cdelta)
1200 1219 ifh.write(entry)
1201 1220
1202 1221 t, r, chain, prev = r, r + 1, node, node
1203 1222 base = self.base(t)
1204 1223 start = self.start(base)
1205 1224 end = self.end(t)
1206 1225
1207 1226 return node
1208 1227
1209 1228 def strip(self, rev, minlink):
1210 1229 if self.count() == 0 or rev >= self.count():
1211 1230 return
1212 1231
1213 1232 if isinstance(self.index, lazyindex):
1214 1233 self._loadindexmap()
1215 1234
1216 1235 # When stripping away a revision, we need to make sure it
1217 1236 # does not actually belong to an older changeset.
1218 1237 # The minlink parameter defines the oldest revision
1219 1238 # we're allowed to strip away.
1220 1239 while minlink > self.index[rev][4]:
1221 1240 rev += 1
1222 1241 if rev >= self.count():
1223 1242 return
1224 1243
1225 1244 # first truncate the files on disk
1226 1245 end = self.start(rev)
1227 1246 if not self._inline():
1228 1247 df = self.opener(self.datafile, "a")
1229 1248 df.truncate(end)
1230 1249 end = rev * self._io.size
1231 1250 else:
1232 1251 end += rev * self._io.size
1233 1252
1234 1253 indexf = self.opener(self.indexfile, "a")
1235 1254 indexf.truncate(end)
1236 1255
1237 1256 # then reset internal state in memory to forget those revisions
1238 1257 self.cache = None
1239 1258 self._io.chunkcache = None
1240 1259 for x in xrange(rev, self.count()):
1241 1260 del self.nodemap[self.node(x)]
1242 1261
1243 1262 del self.index[rev:-1]
1244 1263
1245 1264 def checksize(self):
1246 1265 expected = 0
1247 1266 if self.count():
1248 1267 expected = self.end(self.count() - 1)
1249 1268
1250 1269 try:
1251 1270 f = self.opener(self.datafile)
1252 1271 f.seek(0, 2)
1253 1272 actual = f.tell()
1254 1273 dd = actual - expected
1255 1274 except IOError, inst:
1256 1275 if inst.errno != errno.ENOENT:
1257 1276 raise
1258 1277 dd = 0
1259 1278
1260 1279 try:
1261 1280 f = self.opener(self.indexfile)
1262 1281 f.seek(0, 2)
1263 1282 actual = f.tell()
1264 1283 s = self._io.size
1265 1284 i = actual / s
1266 1285 di = actual - (i * s)
1267 1286 if self._inline():
1268 1287 databytes = 0
1269 1288 for r in xrange(self.count()):
1270 1289 databytes += self.length(r)
1271 1290 dd = 0
1272 1291 di = actual - self.count() * s - databytes
1273 1292 except IOError, inst:
1274 1293 if inst.errno != errno.ENOENT:
1275 1294 raise
1276 1295 di = 0
1277 1296
1278 1297 return (dd, di)
1279 1298
1280 1299
General Comments 0
You need to be logged in to leave comments. Login now