##// END OF EJS Templates
Fix revlog-ng interaction with old-http....
Alexis S. L. Carvalho -
r2138:f5046cab default
parent child Browse files
Show More
@@ -1,24 +1,25
1 1 # httprangereader.py - just what it says
2 2 #
3 3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 import byterange, urllib2
9 9
10 10 class httprangereader(object):
11 11 def __init__(self, url):
12 12 self.url = url
13 13 self.pos = 0
14 14 def seek(self, pos):
15 15 self.pos = pos
16 16 def read(self, bytes=None):
17 17 opener = urllib2.build_opener(byterange.HTTPRangeHandler())
18 18 urllib2.install_opener(opener)
19 19 req = urllib2.Request(self.url)
20 20 end = ''
21 if bytes: end = self.pos + bytes
21 if bytes:
22 end = self.pos + bytes - 1
22 23 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
23 24 f = urllib2.urlopen(req)
24 return f.read()
25 return f.read(bytes)
@@ -1,1241 +1,1241
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import gettext as _
15 15 from demandload import demandload
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 19 # revlog version strings
20 20 REVLOGV0 = 0
21 21 REVLOGNG = 1
22 22
23 23 # revlog flags
24 24 REVLOGNGINLINEDATA = (1 << 16)
25 25
26 26 def flagstr(flag):
27 27 if flag == "inline":
28 28 return REVLOGNGINLINEDATA
29 29 raise RevlogError(_("unknown revlog flag %s" % flag))
30 30
31 31 def hash(text, p1, p2):
32 32 """generate a hash from the given text and its parent hashes
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 l = [p1, p2]
39 39 l.sort()
40 40 s = sha.new(l[0])
41 41 s.update(l[1])
42 42 s.update(text)
43 43 return s.digest()
44 44
45 45 def compress(text):
46 46 """ generate a possibly-compressed representation of text """
47 47 if not text: return ("", text)
48 48 if len(text) < 44:
49 49 if text[0] == '\0': return ("", text)
50 50 return ('u', text)
51 51 bin = zlib.compress(text)
52 52 if len(bin) > len(text):
53 53 if text[0] == '\0': return ("", text)
54 54 return ('u', text)
55 55 return ("", bin)
56 56
57 57 def decompress(bin):
58 58 """ decompress the given input """
59 59 if not bin: return bin
60 60 t = bin[0]
61 61 if t == '\0': return bin
62 62 if t == 'x': return zlib.decompress(bin)
63 63 if t == 'u': return bin[1:]
64 64 raise RevlogError(_("unknown compression type %r") % t)
65 65
66 66 indexformatv0 = ">4l20s20s20s"
67 67 v0shaoffset = 56
68 68 # index ng:
69 69 # 6 bytes offset
70 70 # 2 bytes flags
71 71 # 4 bytes compressed length
72 72 # 4 bytes uncompressed length
73 73 # 4 bytes: base rev
74 74 # 4 bytes link rev
75 75 # 4 bytes parent 1 rev
76 76 # 4 bytes parent 2 rev
77 77 # 32 bytes: nodeid
78 78 indexformatng = ">Qiiiiii20s12x"
79 79 ngshaoffset = 32
80 80 versionformat = ">i"
81 81
82 82 class lazyparser(object):
83 83 """
84 84 this class avoids the need to parse the entirety of large indices
85 85 """
86 86 def __init__(self, dataf, size, indexformat, shaoffset):
87 87 self.dataf = dataf
88 88 self.format = indexformat
89 89 self.s = struct.calcsize(indexformat)
90 90 self.indexformat = indexformat
91 91 self.datasize = size
92 92 self.l = size/self.s
93 93 self.index = [None] * self.l
94 94 self.map = {nullid: -1}
95 95 self.allmap = 0
96 96 self.all = 0
97 97 self.mapfind_count = 0
98 98 self.shaoffset = shaoffset
99 99
100 100 def loadmap(self):
101 101 """
102 102 during a commit, we need to make sure the rev being added is
103 103 not a duplicate. This requires loading the entire index,
104 104 which is fairly slow. loadmap can load up just the node map,
105 105 which takes much less time.
106 106 """
107 107 if self.allmap: return
108 108 start = 0
109 109 end = self.datasize
110 110 self.allmap = 1
111 111 cur = 0
112 112 count = 0
113 113 blocksize = self.s * 256
114 114 self.dataf.seek(0)
115 115 while cur < end:
116 116 data = self.dataf.read(blocksize)
117 117 off = 0
118 118 for x in xrange(256):
119 119 n = data[off + self.shaoffset:off + self.shaoffset + 20]
120 120 self.map[n] = count
121 121 count += 1
122 122 if count >= self.l:
123 123 break
124 124 off += self.s
125 125 cur += blocksize
126 126
127 127 def loadblock(self, blockstart, blocksize, data=None):
128 128 if self.all: return
129 129 if data is None:
130 130 self.dataf.seek(blockstart)
131 131 data = self.dataf.read(blocksize)
132 132 lend = len(data) / self.s
133 133 i = blockstart / self.s
134 134 off = 0
135 135 for x in xrange(lend):
136 136 if self.index[i + x] == None:
137 137 b = data[off : off + self.s]
138 138 self.index[i + x] = b
139 139 n = b[self.shaoffset:self.shaoffset + 20]
140 140 self.map[n] = i + x
141 141 off += self.s
142 142
143 143 def findnode(self, node):
144 144 """search backwards through the index file for a specific node"""
145 145 if self.allmap: return None
146 146
147 147 # hg log will cause many many searches for the manifest
148 148 # nodes. After we get called a few times, just load the whole
149 149 # thing.
150 150 if self.mapfind_count > 8:
151 151 self.loadmap()
152 152 if node in self.map:
153 153 return node
154 154 return None
155 155 self.mapfind_count += 1
156 156 last = self.l - 1
157 157 while self.index[last] != None:
158 158 if last == 0:
159 159 self.all = 1
160 160 self.allmap = 1
161 161 return None
162 162 last -= 1
163 163 end = (last + 1) * self.s
164 164 blocksize = self.s * 256
165 165 while end >= 0:
166 166 start = max(end - blocksize, 0)
167 167 self.dataf.seek(start)
168 168 data = self.dataf.read(end - start)
169 169 findend = end - start
170 170 while True:
171 171 # we're searching backwards, so weh have to make sure
172 172 # we don't find a changeset where this node is a parent
173 173 off = data.rfind(node, 0, findend)
174 174 findend = off
175 175 if off >= 0:
176 176 i = off / self.s
177 177 off = i * self.s
178 178 n = data[off + self.shaoffset:off + self.shaoffset + 20]
179 179 if n == node:
180 180 self.map[n] = i + start / self.s
181 181 return node
182 182 else:
183 183 break
184 184 end -= blocksize
185 185 return None
186 186
187 187 def loadindex(self, i=None, end=None):
188 188 if self.all: return
189 189 all = False
190 190 if i == None:
191 191 blockstart = 0
192 192 blocksize = (512 / self.s) * self.s
193 193 end = self.datasize
194 194 all = True
195 195 else:
196 196 if end:
197 197 blockstart = i * self.s
198 198 end = end * self.s
199 199 blocksize = end - blockstart
200 200 else:
201 201 blockstart = (i & ~(32)) * self.s
202 202 blocksize = self.s * 64
203 203 end = blockstart + blocksize
204 204 while blockstart < end:
205 205 self.loadblock(blockstart, blocksize)
206 206 blockstart += blocksize
207 207 if all: self.all = True
208 208
209 209 class lazyindex(object):
210 210 """a lazy version of the index array"""
211 211 def __init__(self, parser):
212 212 self.p = parser
213 213 def __len__(self):
214 214 return len(self.p.index)
215 215 def load(self, pos):
216 216 if pos < 0:
217 217 pos += len(self.p.index)
218 218 self.p.loadindex(pos)
219 219 return self.p.index[pos]
220 220 def __getitem__(self, pos):
221 221 ret = self.p.index[pos] or self.load(pos)
222 222 if isinstance(ret, str):
223 223 ret = struct.unpack(self.p.indexformat, ret)
224 224 return ret
225 225 def __setitem__(self, pos, item):
226 226 self.p.index[pos] = item
227 227 def __delitem__(self, pos):
228 228 del self.p.index[pos]
229 229 def append(self, e):
230 230 self.p.index.append(e)
231 231
232 232 class lazymap(object):
233 233 """a lazy version of the node map"""
234 234 def __init__(self, parser):
235 235 self.p = parser
236 236 def load(self, key):
237 237 n = self.p.findnode(key)
238 238 if n == None:
239 239 raise KeyError(key)
240 240 def __contains__(self, key):
241 241 if key in self.p.map:
242 242 return True
243 243 self.p.loadmap()
244 244 return key in self.p.map
245 245 def __iter__(self):
246 246 yield nullid
247 247 for i in xrange(self.p.l):
248 248 ret = self.p.index[i]
249 249 if not ret:
250 250 self.p.loadindex(i)
251 251 ret = self.p.index[i]
252 252 if isinstance(ret, str):
253 253 ret = struct.unpack(self.p.indexformat, ret)
254 254 yield ret[-1]
255 255 def __getitem__(self, key):
256 256 try:
257 257 return self.p.map[key]
258 258 except KeyError:
259 259 try:
260 260 self.load(key)
261 261 return self.p.map[key]
262 262 except KeyError:
263 263 raise KeyError("node " + hex(key))
264 264 def __setitem__(self, key, val):
265 265 self.p.map[key] = val
266 266 def __delitem__(self, key):
267 267 del self.p.map[key]
268 268
269 269 class RevlogError(Exception): pass
270 270
271 271 class revlog(object):
272 272 """
273 273 the underlying revision storage object
274 274
275 275 A revlog consists of two parts, an index and the revision data.
276 276
277 277 The index is a file with a fixed record size containing
278 278 information on each revision, includings its nodeid (hash), the
279 279 nodeids of its parents, the position and offset of its data within
280 280 the data file, and the revision it's based on. Finally, each entry
281 281 contains a linkrev entry that can serve as a pointer to external
282 282 data.
283 283
284 284 The revision data itself is a linear collection of data chunks.
285 285 Each chunk represents a revision and is usually represented as a
286 286 delta against the previous chunk. To bound lookup time, runs of
287 287 deltas are limited to about 2 times the length of the original
288 288 version data. This makes retrieval of a version proportional to
289 289 its size, or O(1) relative to the number of revisions.
290 290
291 291 Both pieces of the revlog are written to in an append-only
292 292 fashion, which means we never need to rewrite a file to insert or
293 293 remove data, and can use some simple techniques to avoid the need
294 294 for locking while reading.
295 295 """
296 296 def __init__(self, opener, indexfile, datafile, defversion=0):
297 297 """
298 298 create a revlog object
299 299
300 300 opener is a function that abstracts the file opening operation
301 301 and can be used to implement COW semantics or the like.
302 302 """
303 303 self.indexfile = indexfile
304 304 self.datafile = datafile
305 305 self.opener = opener
306 306
307 307 self.indexstat = None
308 308 self.cache = None
309 309 self.chunkcache = None
310 310 self.defversion = defversion
311 311 self.load()
312 312
313 313 def load(self):
314 314 v = self.defversion
315 315 try:
316 316 f = self.opener(self.indexfile)
317 317 i = f.read(4)
318 318 f.seek(0)
319 319 except IOError, inst:
320 320 if inst.errno != errno.ENOENT:
321 321 raise
322 322 i = ""
323 323 else:
324 324 try:
325 325 st = os.fstat(f.fileno())
326 326 except AttributeError, inst:
327 327 st = None
328 328 else:
329 329 oldst = self.indexstat
330 330 if (oldst and st.st_dev == oldst.st_dev
331 331 and st.st_ino == oldst.st_ino
332 332 and st.st_mtime == oldst.st_mtime
333 333 and st.st_ctime == oldst.st_ctime):
334 334 return
335 335 self.indexstat = st
336 if len(i) > 0:
337 v = struct.unpack(versionformat, i)[0]
336 if len(i) > 0:
337 v = struct.unpack(versionformat, i)[0]
338 338 flags = v & ~0xFFFF
339 339 fmt = v & 0xFFFF
340 340 if fmt == 0:
341 341 if flags:
342 342 raise RevlogError(_("index %s invalid flags %x for format v0" %
343 343 (self.indexfile, flags)))
344 344 elif fmt == REVLOGNG:
345 345 if flags & ~REVLOGNGINLINEDATA:
346 346 raise RevlogError(_("index %s invalid flags %x for revlogng" %
347 347 (self.indexfile, flags)))
348 348 else:
349 349 raise RevlogError(_("index %s invalid format %d" %
350 350 (self.indexfile, fmt)))
351 351 self.version = v
352 352 if v == 0:
353 353 self.indexformat = indexformatv0
354 354 shaoffset = v0shaoffset
355 355 else:
356 356 self.indexformat = indexformatng
357 357 shaoffset = ngshaoffset
358 358
359 359 if i:
360 360 if not self.inlinedata() and st and st.st_size > 10000:
361 361 # big index, let's parse it on demand
362 362 parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
363 363 self.index = lazyindex(parser)
364 364 self.nodemap = lazymap(parser)
365 365 else:
366 366 i = f.read()
367 367 self.parseindex(i)
368 368 if self.inlinedata():
369 369 # we've already got the entire data file read in, save it
370 370 # in the chunk data
371 371 self.chunkcache = (0, i)
372 372 if self.version != 0:
373 373 e = list(self.index[0])
374 374 type = self.ngtype(e[0])
375 375 e[0] = self.offset_type(0, type)
376 376 self.index[0] = e
377 377 else:
378 378 self.nodemap = { nullid: -1}
379 379 self.index = []
380 380
381 381
382 382 def parseindex(self, data):
383 383 s = struct.calcsize(self.indexformat)
384 384 l = len(data)
385 385 self.index = []
386 386 self.nodemap = {nullid: -1}
387 387 inline = self.inlinedata()
388 388 off = 0
389 389 n = 0
390 390 while off < l:
391 391 e = struct.unpack(self.indexformat, data[off:off + s])
392 392 self.index.append(e)
393 393 self.nodemap[e[-1]] = n
394 394 n += 1
395 395 off += s
396 396 if inline:
397 397 off += e[1]
398 398
399 399 def ngoffset(self, q):
400 400 if q & 0xFFFF:
401 401 raise RevlogError(_('%s: incompatible revision flag %x') %
402 402 (self.indexfile, type))
403 403 return long(q >> 16)
404 404
405 405 def ngtype(self, q):
406 406 return int(q & 0xFFFF)
407 407
408 408 def offset_type(self, offset, type):
409 409 return long(long(offset) << 16 | type)
410 410
411 411 def loadindex(self, start, end):
412 412 """load a block of indexes all at once from the lazy parser"""
413 413 if isinstance(self.index, lazyindex):
414 414 self.index.p.loadindex(start, end)
415 415
416 416 def loadindexmap(self):
417 417 """loads both the map and the index from the lazy parser"""
418 418 if isinstance(self.index, lazyindex):
419 419 p = self.index.p
420 420 p.loadindex()
421 421 self.nodemap = p.map
422 422
423 423 def loadmap(self):
424 424 """loads the map from the lazy parser"""
425 425 if isinstance(self.nodemap, lazymap):
426 426 self.nodemap.p.loadmap()
427 427 self.nodemap = self.nodemap.p.map
428 428
429 429 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
430 430 def tip(self): return self.node(len(self.index) - 1)
431 431 def count(self): return len(self.index)
432 432 def node(self, rev):
433 433 return (rev < 0) and nullid or self.index[rev][-1]
434 434 def rev(self, node):
435 435 try:
436 436 return self.nodemap[node]
437 437 except KeyError:
438 438 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
439 439 def linkrev(self, node): return self.index[self.rev(node)][-4]
440 440 def parents(self, node):
441 441 if node == nullid: return (nullid, nullid)
442 442 r = self.rev(node)
443 443 d = self.index[r][-3:-1]
444 444 if self.version == 0:
445 445 return d
446 446 return [ self.node(x) for x in d ]
447 447 def start(self, rev):
448 448 if rev < 0:
449 449 return -1
450 450 if self.version != 0:
451 451 return self.ngoffset(self.index[rev][0])
452 452 return self.index[rev][0]
453 453
454 454 def end(self, rev): return self.start(rev) + self.length(rev)
455 455
456 456 def size(self, rev):
457 457 """return the length of the uncompressed text for a given revision"""
458 458 l = -1
459 459 if self.version != 0:
460 460 l = self.index[rev][2]
461 461 if l >= 0:
462 462 return l
463 463
464 464 t = self.revision(self.node(rev))
465 465 return len(t)
466 466
467 467 # alternate implementation, The advantage to this code is it
468 468 # will be faster for a single revision. But, the results are not
469 469 # cached, so finding the size of every revision will be slower.
470 470 """
471 471 if self.cache and self.cache[1] == rev:
472 472 return len(self.cache[2])
473 473
474 474 base = self.base(rev)
475 475 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
476 476 base = self.cache[1]
477 477 text = self.cache[2]
478 478 else:
479 479 text = self.revision(self.node(base))
480 480
481 481 l = len(text)
482 482 for x in xrange(base + 1, rev + 1):
483 483 l = mdiff.patchedsize(l, self.chunk(x))
484 484 return l
485 485 """
486 486
487 487 def length(self, rev):
488 488 if rev < 0:
489 489 return 0
490 490 else:
491 491 return self.index[rev][1]
492 492 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
493 493
494 494 def reachable(self, rev, stop=None):
495 495 reachable = {}
496 496 visit = [rev]
497 497 reachable[rev] = 1
498 498 if stop:
499 499 stopn = self.rev(stop)
500 500 else:
501 501 stopn = 0
502 502 while visit:
503 503 n = visit.pop(0)
504 504 if n == stop:
505 505 continue
506 506 if n == nullid:
507 507 continue
508 508 for p in self.parents(n):
509 509 if self.rev(p) < stopn:
510 510 continue
511 511 if p not in reachable:
512 512 reachable[p] = 1
513 513 visit.append(p)
514 514 return reachable
515 515
516 516 def nodesbetween(self, roots=None, heads=None):
517 517 """Return a tuple containing three elements. Elements 1 and 2 contain
518 518 a final list bases and heads after all the unreachable ones have been
519 519 pruned. Element 0 contains a topologically sorted list of all
520 520
521 521 nodes that satisfy these constraints:
522 522 1. All nodes must be descended from a node in roots (the nodes on
523 523 roots are considered descended from themselves).
524 524 2. All nodes must also be ancestors of a node in heads (the nodes in
525 525 heads are considered to be their own ancestors).
526 526
527 527 If roots is unspecified, nullid is assumed as the only root.
528 528 If heads is unspecified, it is taken to be the output of the
529 529 heads method (i.e. a list of all nodes in the repository that
530 530 have no children)."""
531 531 nonodes = ([], [], [])
532 532 if roots is not None:
533 533 roots = list(roots)
534 534 if not roots:
535 535 return nonodes
536 536 lowestrev = min([self.rev(n) for n in roots])
537 537 else:
538 538 roots = [nullid] # Everybody's a descendent of nullid
539 539 lowestrev = -1
540 540 if (lowestrev == -1) and (heads is None):
541 541 # We want _all_ the nodes!
542 542 return ([self.node(r) for r in xrange(0, self.count())],
543 543 [nullid], list(self.heads()))
544 544 if heads is None:
545 545 # All nodes are ancestors, so the latest ancestor is the last
546 546 # node.
547 547 highestrev = self.count() - 1
548 548 # Set ancestors to None to signal that every node is an ancestor.
549 549 ancestors = None
550 550 # Set heads to an empty dictionary for later discovery of heads
551 551 heads = {}
552 552 else:
553 553 heads = list(heads)
554 554 if not heads:
555 555 return nonodes
556 556 ancestors = {}
557 557 # Start at the top and keep marking parents until we're done.
558 558 nodestotag = heads[:]
559 559 # Turn heads into a dictionary so we can remove 'fake' heads.
560 560 # Also, later we will be using it to filter out the heads we can't
561 561 # find from roots.
562 562 heads = dict.fromkeys(heads, 0)
563 563 # Remember where the top was so we can use it as a limit later.
564 564 highestrev = max([self.rev(n) for n in nodestotag])
565 565 while nodestotag:
566 566 # grab a node to tag
567 567 n = nodestotag.pop()
568 568 # Never tag nullid
569 569 if n == nullid:
570 570 continue
571 571 # A node's revision number represents its place in a
572 572 # topologically sorted list of nodes.
573 573 r = self.rev(n)
574 574 if r >= lowestrev:
575 575 if n not in ancestors:
576 576 # If we are possibly a descendent of one of the roots
577 577 # and we haven't already been marked as an ancestor
578 578 ancestors[n] = 1 # Mark as ancestor
579 579 # Add non-nullid parents to list of nodes to tag.
580 580 nodestotag.extend([p for p in self.parents(n) if
581 581 p != nullid])
582 582 elif n in heads: # We've seen it before, is it a fake head?
583 583 # So it is, real heads should not be the ancestors of
584 584 # any other heads.
585 585 heads.pop(n)
586 586 if not ancestors:
587 587 return nonodes
588 588 # Now that we have our set of ancestors, we want to remove any
589 589 # roots that are not ancestors.
590 590
591 591 # If one of the roots was nullid, everything is included anyway.
592 592 if lowestrev > -1:
593 593 # But, since we weren't, let's recompute the lowest rev to not
594 594 # include roots that aren't ancestors.
595 595
596 596 # Filter out roots that aren't ancestors of heads
597 597 roots = [n for n in roots if n in ancestors]
598 598 # Recompute the lowest revision
599 599 if roots:
600 600 lowestrev = min([self.rev(n) for n in roots])
601 601 else:
602 602 # No more roots? Return empty list
603 603 return nonodes
604 604 else:
605 605 # We are descending from nullid, and don't need to care about
606 606 # any other roots.
607 607 lowestrev = -1
608 608 roots = [nullid]
609 609 # Transform our roots list into a 'set' (i.e. a dictionary where the
610 610 # values don't matter.
611 611 descendents = dict.fromkeys(roots, 1)
612 612 # Also, keep the original roots so we can filter out roots that aren't
613 613 # 'real' roots (i.e. are descended from other roots).
614 614 roots = descendents.copy()
615 615 # Our topologically sorted list of output nodes.
616 616 orderedout = []
617 617 # Don't start at nullid since we don't want nullid in our output list,
618 618 # and if nullid shows up in descedents, empty parents will look like
619 619 # they're descendents.
620 620 for r in xrange(max(lowestrev, 0), highestrev + 1):
621 621 n = self.node(r)
622 622 isdescendent = False
623 623 if lowestrev == -1: # Everybody is a descendent of nullid
624 624 isdescendent = True
625 625 elif n in descendents:
626 626 # n is already a descendent
627 627 isdescendent = True
628 628 # This check only needs to be done here because all the roots
629 629 # will start being marked is descendents before the loop.
630 630 if n in roots:
631 631 # If n was a root, check if it's a 'real' root.
632 632 p = tuple(self.parents(n))
633 633 # If any of its parents are descendents, it's not a root.
634 634 if (p[0] in descendents) or (p[1] in descendents):
635 635 roots.pop(n)
636 636 else:
637 637 p = tuple(self.parents(n))
638 638 # A node is a descendent if either of its parents are
639 639 # descendents. (We seeded the dependents list with the roots
640 640 # up there, remember?)
641 641 if (p[0] in descendents) or (p[1] in descendents):
642 642 descendents[n] = 1
643 643 isdescendent = True
644 644 if isdescendent and ((ancestors is None) or (n in ancestors)):
645 645 # Only include nodes that are both descendents and ancestors.
646 646 orderedout.append(n)
647 647 if (ancestors is not None) and (n in heads):
648 648 # We're trying to figure out which heads are reachable
649 649 # from roots.
650 650 # Mark this head as having been reached
651 651 heads[n] = 1
652 652 elif ancestors is None:
653 653 # Otherwise, we're trying to discover the heads.
654 654 # Assume this is a head because if it isn't, the next step
655 655 # will eventually remove it.
656 656 heads[n] = 1
657 657 # But, obviously its parents aren't.
658 658 for p in self.parents(n):
659 659 heads.pop(p, None)
660 660 heads = [n for n in heads.iterkeys() if heads[n] != 0]
661 661 roots = roots.keys()
662 662 assert orderedout
663 663 assert roots
664 664 assert heads
665 665 return (orderedout, roots, heads)
666 666
667 667 def heads(self, start=None):
668 668 """return the list of all nodes that have no children
669 669
670 670 if start is specified, only heads that are descendants of
671 671 start will be returned
672 672
673 673 """
674 674 if start is None:
675 675 start = nullid
676 676 reachable = {start: 1}
677 677 heads = {start: 1}
678 678 startrev = self.rev(start)
679 679
680 680 for r in xrange(startrev + 1, self.count()):
681 681 n = self.node(r)
682 682 for pn in self.parents(n):
683 683 if pn in reachable:
684 684 reachable[n] = 1
685 685 heads[n] = 1
686 686 if pn in heads:
687 687 del heads[pn]
688 688 return heads.keys()
689 689
690 690 def children(self, node):
691 691 """find the children of a given node"""
692 692 c = []
693 693 p = self.rev(node)
694 694 for r in range(p + 1, self.count()):
695 695 n = self.node(r)
696 696 for pn in self.parents(n):
697 697 if pn == node:
698 698 c.append(n)
699 699 continue
700 700 elif pn == nullid:
701 701 continue
702 702 return c
703 703
704 704 def lookup(self, id):
705 705 """locate a node based on revision number or subset of hex nodeid"""
706 706 try:
707 707 rev = int(id)
708 708 if str(rev) != id: raise ValueError
709 709 if rev < 0: rev = self.count() + rev
710 710 if rev < 0 or rev >= self.count(): raise ValueError
711 711 return self.node(rev)
712 712 except (ValueError, OverflowError):
713 713 c = []
714 714 for n in self.nodemap:
715 715 if hex(n).startswith(id):
716 716 c.append(n)
717 717 if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
718 718 if len(c) < 1: raise RevlogError(_("No match found"))
719 719 return c[0]
720 720
721 721 return None
722 722
723 723 def diff(self, a, b):
724 724 """return a delta between two revisions"""
725 725 return mdiff.textdiff(a, b)
726 726
727 727 def patches(self, t, pl):
728 728 """apply a list of patches to a string"""
729 729 return mdiff.patches(t, pl)
730 730
731 731 def chunk(self, rev, df=None, cachelen=4096):
732 732 start, length = self.start(rev), self.length(rev)
733 733 inline = self.inlinedata()
734 734 if inline:
735 735 start += (rev + 1) * struct.calcsize(self.indexformat)
736 736 end = start + length
737 737 def loadcache(df):
738 738 cache_length = max(cachelen, length) # 4k
739 739 if not df:
740 740 if inline:
741 741 df = self.opener(self.indexfile)
742 742 else:
743 743 df = self.opener(self.datafile)
744 744 df.seek(start)
745 745 self.chunkcache = (start, df.read(cache_length))
746 746
747 747 if not self.chunkcache:
748 748 loadcache(df)
749 749
750 750 cache_start = self.chunkcache[0]
751 751 cache_end = cache_start + len(self.chunkcache[1])
752 752 if start >= cache_start and end <= cache_end:
753 753 # it is cached
754 754 offset = start - cache_start
755 755 else:
756 756 loadcache(df)
757 757 offset = 0
758 758
759 759 #def checkchunk():
760 760 # df = self.opener(self.datafile)
761 761 # df.seek(start)
762 762 # return df.read(length)
763 763 #assert s == checkchunk()
764 764 return decompress(self.chunkcache[1][offset:offset + length])
765 765
766 766 def delta(self, node):
767 767 """return or calculate a delta between a node and its predecessor"""
768 768 r = self.rev(node)
769 769 return self.revdiff(r - 1, r)
770 770
771 771 def revdiff(self, rev1, rev2):
772 772 """return or calculate a delta between two revisions"""
773 773 b1 = self.base(rev1)
774 774 b2 = self.base(rev2)
775 775 if b1 == b2 and rev1 + 1 == rev2:
776 776 return self.chunk(rev2)
777 777 else:
778 778 return self.diff(self.revision(self.node(rev1)),
779 779 self.revision(self.node(rev2)))
780 780
781 781 def revision(self, node):
782 782 """return an uncompressed revision of a given"""
783 783 if node == nullid: return ""
784 784 if self.cache and self.cache[0] == node: return self.cache[2]
785 785
786 786 # look up what we need to read
787 787 text = None
788 788 rev = self.rev(node)
789 789 base = self.base(rev)
790 790
791 791 if self.inlinedata():
792 792 # we probably have the whole chunk cached
793 793 df = None
794 794 else:
795 795 df = self.opener(self.datafile)
796 796
797 797 # do we have useful data cached?
798 798 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
799 799 base = self.cache[1]
800 800 text = self.cache[2]
801 801 self.loadindex(base, rev + 1)
802 802 else:
803 803 self.loadindex(base, rev + 1)
804 804 text = self.chunk(base, df=df)
805 805
806 806 bins = []
807 807 for r in xrange(base + 1, rev + 1):
808 808 bins.append(self.chunk(r, df=df))
809 809
810 810 text = self.patches(text, bins)
811 811
812 812 p1, p2 = self.parents(node)
813 813 if node != hash(text, p1, p2):
814 814 raise RevlogError(_("integrity check failed on %s:%d")
815 815 % (self.datafile, rev))
816 816
817 817 self.cache = (node, rev, text)
818 818 return text
819 819
820 820 def checkinlinesize(self, tr, fp=None):
821 821 if not self.inlinedata():
822 822 return
823 823 if not fp:
824 824 fp = self.opener(self.indexfile, 'r')
825 825 fp.seek(0, 2)
826 826 size = fp.tell()
827 827 if size < 131072:
828 828 return
829 829 trinfo = tr.find(self.indexfile)
830 830 if trinfo == None:
831 831 raise RevlogError(_("%s not found in the transaction" %
832 832 self.indexfile))
833 833
834 834 trindex = trinfo[2]
835 835 dataoff = self.start(trindex)
836 836
837 837 tr.add(self.datafile, dataoff)
838 838 df = self.opener(self.datafile, 'w')
839 839 calc = struct.calcsize(self.indexformat)
840 840 for r in xrange(self.count()):
841 841 start = self.start(r) + (r + 1) * calc
842 842 length = self.length(r)
843 843 fp.seek(start)
844 844 d = fp.read(length)
845 845 df.write(d)
846 846 fp.close()
847 847 df.close()
848 848 fp = self.opener(self.indexfile, 'w', atomictemp=True)
849 849 self.version &= ~(REVLOGNGINLINEDATA)
850 850 if self.count():
851 851 x = self.index[0]
852 852 e = struct.pack(self.indexformat, *x)[4:]
853 853 l = struct.pack(versionformat, self.version)
854 854 fp.write(l)
855 855 fp.write(e)
856 856
857 857 for i in xrange(1, self.count()):
858 858 x = self.index[i]
859 859 e = struct.pack(self.indexformat, *x)
860 860 fp.write(e)
861 861
862 862 # if we don't call rename, the temp file will never replace the
863 863 # real index
864 864 fp.rename()
865 865
866 866 tr.replace(self.indexfile, trindex * calc)
867 867 self.chunkcache = None
868 868
869 869 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
870 870 """add a revision to the log
871 871
872 872 text - the revision data to add
873 873 transaction - the transaction object used for rollback
874 874 link - the linkrev data to add
875 875 p1, p2 - the parent nodeids of the revision
876 876 d - an optional precomputed delta
877 877 """
878 878 if text is None: text = ""
879 879 if p1 is None: p1 = self.tip()
880 880 if p2 is None: p2 = nullid
881 881
882 882 node = hash(text, p1, p2)
883 883
884 884 if node in self.nodemap:
885 885 return node
886 886
887 887 n = self.count()
888 888 t = n - 1
889 889
890 890 if n:
891 891 base = self.base(t)
892 892 start = self.start(base)
893 893 end = self.end(t)
894 894 if not d:
895 895 prev = self.revision(self.tip())
896 896 d = self.diff(prev, str(text))
897 897 data = compress(d)
898 898 l = len(data[1]) + len(data[0])
899 899 dist = end - start + l
900 900
901 901 # full versions are inserted when the needed deltas
902 902 # become comparable to the uncompressed text
903 903 if not n or dist > len(text) * 2:
904 904 data = compress(text)
905 905 l = len(data[1]) + len(data[0])
906 906 base = n
907 907 else:
908 908 base = self.base(t)
909 909
910 910 offset = 0
911 911 if t >= 0:
912 912 offset = self.end(t)
913 913
914 914 if self.version == 0:
915 915 e = (offset, l, base, link, p1, p2, node)
916 916 else:
917 917 e = (self.offset_type(offset, 0), l, len(text),
918 918 base, link, self.rev(p1), self.rev(p2), node)
919 919
920 920 self.index.append(e)
921 921 self.nodemap[node] = n
922 922 entry = struct.pack(self.indexformat, *e)
923 923
924 924 if not self.inlinedata():
925 925 transaction.add(self.datafile, offset)
926 926 transaction.add(self.indexfile, n * len(entry))
927 927 f = self.opener(self.datafile, "a")
928 928 if data[0]:
929 929 f.write(data[0])
930 930 f.write(data[1])
931 931 f.close()
932 932 f = self.opener(self.indexfile, "a")
933 933 else:
934 934 f = self.opener(self.indexfile, "a+")
935 935 f.seek(0, 2)
936 936 transaction.add(self.indexfile, f.tell(), self.count() - 1)
937 937
938 938 if len(self.index) == 1 and self.version != 0:
939 939 l = struct.pack(versionformat, self.version)
940 940 f.write(l)
941 941 entry = entry[4:]
942 942
943 943 f.write(entry)
944 944
945 945 if self.inlinedata():
946 946 f.write(data[0])
947 947 f.write(data[1])
948 948 self.checkinlinesize(transaction, f)
949 949
950 950 self.cache = (node, n, text)
951 951 return node
952 952
953 953 def ancestor(self, a, b):
954 954 """calculate the least common ancestor of nodes a and b"""
955 955
956 956 # start with some short cuts for the linear cases
957 957 if a == b:
958 958 return a
959 959 ra = self.rev(a)
960 960 rb = self.rev(b)
961 961 if ra < rb:
962 962 last = b
963 963 first = a
964 964 else:
965 965 last = a
966 966 first = b
967 967
968 968 # reachable won't include stop in the list, so we have to use a parent
969 969 reachable = self.reachable(last, stop=self.parents(first)[0])
970 970 if first in reachable:
971 971 return first
972 972
973 973 # calculate the distance of every node from root
974 974 dist = {nullid: 0}
975 975 for i in xrange(self.count()):
976 976 n = self.node(i)
977 977 p1, p2 = self.parents(n)
978 978 dist[n] = max(dist[p1], dist[p2]) + 1
979 979
980 980 # traverse ancestors in order of decreasing distance from root
981 981 def ancestors(node):
982 982 # we store negative distances because heap returns smallest member
983 983 h = [(-dist[node], node)]
984 984 seen = {}
985 985 while h:
986 986 d, n = heapq.heappop(h)
987 987 if n not in seen:
988 988 seen[n] = 1
989 989 yield (-d, n)
990 990 for p in self.parents(n):
991 991 heapq.heappush(h, (-dist[p], p))
992 992
993 993 def generations(node):
994 994 sg, s = None, {}
995 995 for g,n in ancestors(node):
996 996 if g != sg:
997 997 if sg:
998 998 yield sg, s
999 999 sg, s = g, {n:1}
1000 1000 else:
1001 1001 s[n] = 1
1002 1002 yield sg, s
1003 1003
1004 1004 x = generations(a)
1005 1005 y = generations(b)
1006 1006 gx = x.next()
1007 1007 gy = y.next()
1008 1008
1009 1009 # increment each ancestor list until it is closer to root than
1010 1010 # the other, or they match
1011 1011 while 1:
1012 1012 #print "ancestor gen %s %s" % (gx[0], gy[0])
1013 1013 if gx[0] == gy[0]:
1014 1014 # find the intersection
1015 1015 i = [ n for n in gx[1] if n in gy[1] ]
1016 1016 if i:
1017 1017 return i[0]
1018 1018 else:
1019 1019 #print "next"
1020 1020 gy = y.next()
1021 1021 gx = x.next()
1022 1022 elif gx[0] < gy[0]:
1023 1023 #print "next y"
1024 1024 gy = y.next()
1025 1025 else:
1026 1026 #print "next x"
1027 1027 gx = x.next()
1028 1028
1029 1029 def group(self, nodelist, lookup, infocollect=None):
1030 1030 """calculate a delta group
1031 1031
1032 1032 Given a list of changeset revs, return a set of deltas and
1033 1033 metadata corresponding to nodes. the first delta is
1034 1034 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1035 1035 have this parent as it has all history before these
1036 1036 changesets. parent is parent[0]
1037 1037 """
1038 1038 revs = [self.rev(n) for n in nodelist]
1039 1039
1040 1040 # if we don't have any revisions touched by these changesets, bail
1041 1041 if not revs:
1042 1042 yield changegroup.closechunk()
1043 1043 return
1044 1044
1045 1045 # add the parent of the first rev
1046 1046 p = self.parents(self.node(revs[0]))[0]
1047 1047 revs.insert(0, self.rev(p))
1048 1048
1049 1049 # build deltas
1050 1050 for d in xrange(0, len(revs) - 1):
1051 1051 a, b = revs[d], revs[d + 1]
1052 1052 nb = self.node(b)
1053 1053
1054 1054 if infocollect is not None:
1055 1055 infocollect(nb)
1056 1056
1057 1057 d = self.revdiff(a, b)
1058 1058 p = self.parents(nb)
1059 1059 meta = nb + p[0] + p[1] + lookup(nb)
1060 1060 yield changegroup.genchunk("%s%s" % (meta, d))
1061 1061
1062 1062 yield changegroup.closechunk()
1063 1063
1064 1064 def addgroup(self, revs, linkmapper, transaction, unique=0):
1065 1065 """
1066 1066 add a delta group
1067 1067
1068 1068 given a set of deltas, add them to the revision log. the
1069 1069 first delta is against its parent, which should be in our
1070 1070 log, the rest are against the previous delta.
1071 1071 """
1072 1072
1073 1073 #track the base of the current delta log
1074 1074 r = self.count()
1075 1075 t = r - 1
1076 1076 node = None
1077 1077
1078 1078 base = prev = -1
1079 1079 start = end = textlen = 0
1080 1080 if r:
1081 1081 end = self.end(t)
1082 1082
1083 1083 ifh = self.opener(self.indexfile, "a+")
1084 1084 ifh.seek(0, 2)
1085 1085 transaction.add(self.indexfile, ifh.tell(), self.count())
1086 1086 if self.inlinedata():
1087 1087 dfh = None
1088 1088 else:
1089 1089 transaction.add(self.datafile, end)
1090 1090 dfh = self.opener(self.datafile, "a")
1091 1091
1092 1092 # loop through our set of deltas
1093 1093 chain = None
1094 1094 for chunk in revs:
1095 1095 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1096 1096 link = linkmapper(cs)
1097 1097 if node in self.nodemap:
1098 1098 # this can happen if two branches make the same change
1099 1099 # if unique:
1100 1100 # raise RevlogError(_("already have %s") % hex(node[:4]))
1101 1101 chain = node
1102 1102 continue
1103 1103 delta = chunk[80:]
1104 1104
1105 1105 for p in (p1, p2):
1106 1106 if not p in self.nodemap:
1107 1107 raise RevlogError(_("unknown parent %s") % short(p1))
1108 1108
1109 1109 if not chain:
1110 1110 # retrieve the parent revision of the delta chain
1111 1111 chain = p1
1112 1112 if not chain in self.nodemap:
1113 1113 raise RevlogError(_("unknown base %s") % short(chain[:4]))
1114 1114
1115 1115 # full versions are inserted when the needed deltas become
1116 1116 # comparable to the uncompressed text or when the previous
1117 1117 # version is not the one we have a delta against. We use
1118 1118 # the size of the previous full rev as a proxy for the
1119 1119 # current size.
1120 1120
1121 1121 if chain == prev:
1122 1122 tempd = compress(delta)
1123 1123 cdelta = tempd[0] + tempd[1]
1124 1124 textlen = mdiff.patchedsize(textlen, delta)
1125 1125
1126 1126 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1127 1127 # flush our writes here so we can read it in revision
1128 1128 if dfh:
1129 1129 dfh.flush()
1130 1130 ifh.flush()
1131 1131 text = self.revision(chain)
1132 1132 text = self.patches(text, [delta])
1133 1133 chk = self.addrevision(text, transaction, link, p1, p2)
1134 1134 if chk != node:
1135 1135 raise RevlogError(_("consistency error adding group"))
1136 1136 textlen = len(text)
1137 1137 else:
1138 1138 if self.version == 0:
1139 1139 e = (end, len(cdelta), base, link, p1, p2, node)
1140 1140 else:
1141 1141 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
1142 1142 link, self.rev(p1), self.rev(p2), node)
1143 1143 self.index.append(e)
1144 1144 self.nodemap[node] = r
1145 1145 if self.inlinedata():
1146 1146 ifh.write(struct.pack(self.indexformat, *e))
1147 1147 ifh.write(cdelta)
1148 1148 self.checkinlinesize(transaction, ifh)
1149 1149 if not self.inlinedata():
1150 1150 dfh = self.opener(self.datafile, "a")
1151 1151 ifh = self.opener(self.indexfile, "a")
1152 1152 else:
1153 1153 if not dfh:
1154 1154 # addrevision switched from inline to conventional
1155 1155 # reopen the index
1156 1156 dfh = self.opener(self.datafile, "a")
1157 1157 ifh = self.opener(self.indexfile, "a")
1158 1158 dfh.write(cdelta)
1159 1159 ifh.write(struct.pack(self.indexformat, *e))
1160 1160
1161 1161 t, r, chain, prev = r, r + 1, node, node
1162 1162 base = self.base(t)
1163 1163 start = self.start(base)
1164 1164 end = self.end(t)
1165 1165
1166 1166 if node is None:
1167 1167 raise RevlogError(_("group to be added is empty"))
1168 1168 return node
1169 1169
1170 1170 def strip(self, rev, minlink):
1171 1171 if self.count() == 0 or rev >= self.count():
1172 1172 return
1173 1173
1174 1174 if isinstance(self.index, lazyindex):
1175 1175 self.loadindexmap()
1176 1176
1177 1177 # When stripping away a revision, we need to make sure it
1178 1178 # does not actually belong to an older changeset.
1179 1179 # The minlink parameter defines the oldest revision
1180 1180 # we're allowed to strip away.
1181 1181 while minlink > self.index[rev][-4]:
1182 1182 rev += 1
1183 1183 if rev >= self.count():
1184 1184 return
1185 1185
1186 1186 # first truncate the files on disk
1187 1187 end = self.start(rev)
1188 1188 if not self.inlinedata():
1189 1189 df = self.opener(self.datafile, "a")
1190 1190 df.truncate(end)
1191 1191 end = rev * struct.calcsize(self.indexformat)
1192 1192 else:
1193 1193 end += rev * struct.calcsize(self.indexformat)
1194 1194
1195 1195 indexf = self.opener(self.indexfile, "a")
1196 1196 indexf.truncate(end)
1197 1197
1198 1198 # then reset internal state in memory to forget those revisions
1199 1199 self.cache = None
1200 1200 self.chunkcache = None
1201 1201 for x in xrange(rev, self.count()):
1202 1202 del self.nodemap[self.node(x)]
1203 1203
1204 1204 del self.index[rev:]
1205 1205
1206 1206 def checksize(self):
1207 1207 expected = 0
1208 1208 if self.count():
1209 1209 expected = self.end(self.count() - 1)
1210 1210
1211 1211 try:
1212 1212 f = self.opener(self.datafile)
1213 1213 f.seek(0, 2)
1214 1214 actual = f.tell()
1215 1215 dd = actual - expected
1216 1216 except IOError, inst:
1217 1217 if inst.errno != errno.ENOENT:
1218 1218 raise
1219 1219 dd = 0
1220 1220
1221 1221 try:
1222 1222 f = self.opener(self.indexfile)
1223 1223 f.seek(0, 2)
1224 1224 actual = f.tell()
1225 1225 s = struct.calcsize(self.indexformat)
1226 1226 i = actual / s
1227 1227 di = actual - (i * s)
1228 1228 if self.inlinedata():
1229 1229 databytes = 0
1230 1230 for r in xrange(self.count()):
1231 1231 databytes += self.length(r)
1232 1232 dd = 0
1233 1233 di = actual - self.count() * s - databytes
1234 1234 except IOError, inst:
1235 1235 if inst.errno != errno.ENOENT:
1236 1236 raise
1237 1237 di = 0
1238 1238
1239 1239 return (dd, di)
1240 1240
1241 1241
General Comments 0
You need to be logged in to leave comments. Login now