##// END OF EJS Templates
static-http: disable lazy parsing...
Matt Mackall -
r11155:245a67fe default
parent child Browse files
Show More
@@ -1,1399 +1,1402
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 33
34 34 # amount of data read unconditionally, should be >= 4
35 35 # when not inline: threshold for using lazy index
36 36 _prereadsize = 1048576
37 37 # max size of revlog with inline data
38 38 _maxinline = 131072
39 39
40 40 RevlogError = error.RevlogError
41 41 LookupError = error.LookupError
42 42
43 43 def getoffset(q):
44 44 return int(q >> 16)
45 45
46 46 def gettype(q):
47 47 return int(q & 0xFFFF)
48 48
49 49 def offset_type(offset, type):
50 50 return long(long(offset) << 16 | type)
51 51
52 52 nullhash = _sha(nullid)
53 53
54 54 def hash(text, p1, p2):
55 55 """generate a hash from the given text and its parent hashes
56 56
57 57 This hash combines both the current file contents and its history
58 58 in a manner that makes it easy to distinguish nodes with the same
59 59 content in the revision graph.
60 60 """
61 61 # As of now, if one of the parent node is null, p2 is null
62 62 if p2 == nullid:
63 63 # deep copy of a hash is faster than creating one
64 64 s = nullhash.copy()
65 65 s.update(p1)
66 66 else:
67 67 # none of the parent nodes are nullid
68 68 l = [p1, p2]
69 69 l.sort()
70 70 s = _sha(l[0])
71 71 s.update(l[1])
72 72 s.update(text)
73 73 return s.digest()
74 74
75 75 def compress(text):
76 76 """ generate a possibly-compressed representation of text """
77 77 if not text:
78 78 return ("", text)
79 79 l = len(text)
80 80 bin = None
81 81 if l < 44:
82 82 pass
83 83 elif l > 1000000:
84 84 # zlib makes an internal copy, thus doubling memory usage for
85 85 # large files, so lets do this in pieces
86 86 z = zlib.compressobj()
87 87 p = []
88 88 pos = 0
89 89 while pos < l:
90 90 pos2 = pos + 2**20
91 91 p.append(z.compress(text[pos:pos2]))
92 92 pos = pos2
93 93 p.append(z.flush())
94 94 if sum(map(len, p)) < l:
95 95 bin = "".join(p)
96 96 else:
97 97 bin = _compress(text)
98 98 if bin is None or len(bin) > l:
99 99 if text[0] == '\0':
100 100 return ("", text)
101 101 return ('u', text)
102 102 return ("", bin)
103 103
104 104 def decompress(bin):
105 105 """ decompress the given input """
106 106 if not bin:
107 107 return bin
108 108 t = bin[0]
109 109 if t == '\0':
110 110 return bin
111 111 if t == 'x':
112 112 return _decompress(bin)
113 113 if t == 'u':
114 114 return bin[1:]
115 115 raise RevlogError(_("unknown compression type %r") % t)
116 116
117 117 class lazyparser(object):
118 118 """
119 119 this class avoids the need to parse the entirety of large indices
120 120 """
121 121
122 122 # lazyparser is not safe to use on windows if win32 extensions not
123 123 # available. it keeps file handle open, which make it not possible
124 124 # to break hardlinks on local cloned repos.
125 125
126 126 def __init__(self, dataf):
127 127 try:
128 128 size = util.fstat(dataf).st_size
129 129 except AttributeError:
130 130 size = 0
131 131 self.dataf = dataf
132 132 self.s = struct.calcsize(indexformatng)
133 133 self.datasize = size
134 134 self.l = size / self.s
135 135 self.index = [None] * self.l
136 136 self.map = {nullid: nullrev}
137 137 self.allmap = 0
138 138 self.all = 0
139 139 self.mapfind_count = 0
140 140
141 141 def loadmap(self):
142 142 """
143 143 during a commit, we need to make sure the rev being added is
144 144 not a duplicate. This requires loading the entire index,
145 145 which is fairly slow. loadmap can load up just the node map,
146 146 which takes much less time.
147 147 """
148 148 if self.allmap:
149 149 return
150 150 end = self.datasize
151 151 self.allmap = 1
152 152 cur = 0
153 153 count = 0
154 154 blocksize = self.s * 256
155 155 self.dataf.seek(0)
156 156 while cur < end:
157 157 data = self.dataf.read(blocksize)
158 158 off = 0
159 159 for x in xrange(256):
160 160 n = data[off + ngshaoffset:off + ngshaoffset + 20]
161 161 self.map[n] = count
162 162 count += 1
163 163 if count >= self.l:
164 164 break
165 165 off += self.s
166 166 cur += blocksize
167 167
168 168 def loadblock(self, blockstart, blocksize, data=None):
169 169 if self.all:
170 170 return
171 171 if data is None:
172 172 self.dataf.seek(blockstart)
173 173 if blockstart + blocksize > self.datasize:
174 174 # the revlog may have grown since we've started running,
175 175 # but we don't have space in self.index for more entries.
176 176 # limit blocksize so that we don't get too much data.
177 177 blocksize = max(self.datasize - blockstart, 0)
178 178 data = self.dataf.read(blocksize)
179 179 lend = len(data) / self.s
180 180 i = blockstart / self.s
181 181 off = 0
182 182 # lazyindex supports __delitem__
183 183 if lend > len(self.index) - i:
184 184 lend = len(self.index) - i
185 185 for x in xrange(lend):
186 186 if self.index[i + x] is None:
187 187 b = data[off : off + self.s]
188 188 self.index[i + x] = b
189 189 n = b[ngshaoffset:ngshaoffset + 20]
190 190 self.map[n] = i + x
191 191 off += self.s
192 192
193 193 def findnode(self, node):
194 194 """search backwards through the index file for a specific node"""
195 195 if self.allmap:
196 196 return None
197 197
198 198 # hg log will cause many many searches for the manifest
199 199 # nodes. After we get called a few times, just load the whole
200 200 # thing.
201 201 if self.mapfind_count > 8:
202 202 self.loadmap()
203 203 if node in self.map:
204 204 return node
205 205 return None
206 206 self.mapfind_count += 1
207 207 last = self.l - 1
208 208 while self.index[last] != None:
209 209 if last == 0:
210 210 self.all = 1
211 211 self.allmap = 1
212 212 return None
213 213 last -= 1
214 214 end = (last + 1) * self.s
215 215 blocksize = self.s * 256
216 216 while end >= 0:
217 217 start = max(end - blocksize, 0)
218 218 self.dataf.seek(start)
219 219 data = self.dataf.read(end - start)
220 220 findend = end - start
221 221 while True:
222 222 # we're searching backwards, so we have to make sure
223 223 # we don't find a changeset where this node is a parent
224 224 off = data.find(node, 0, findend)
225 225 findend = off
226 226 if off >= 0:
227 227 i = off / self.s
228 228 off = i * self.s
229 229 n = data[off + ngshaoffset:off + ngshaoffset + 20]
230 230 if n == node:
231 231 self.map[n] = i + start / self.s
232 232 return node
233 233 else:
234 234 break
235 235 end -= blocksize
236 236 return None
237 237
238 238 def loadindex(self, i=None, end=None):
239 239 if self.all:
240 240 return
241 241 all = False
242 242 if i is None:
243 243 blockstart = 0
244 244 blocksize = (65536 / self.s) * self.s
245 245 end = self.datasize
246 246 all = True
247 247 else:
248 248 if end:
249 249 blockstart = i * self.s
250 250 end = end * self.s
251 251 blocksize = end - blockstart
252 252 else:
253 253 blockstart = (i & ~1023) * self.s
254 254 blocksize = self.s * 1024
255 255 end = blockstart + blocksize
256 256 while blockstart < end:
257 257 self.loadblock(blockstart, blocksize)
258 258 blockstart += blocksize
259 259 if all:
260 260 self.all = True
261 261
262 262 class lazyindex(object):
263 263 """a lazy version of the index array"""
264 264 def __init__(self, parser):
265 265 self.p = parser
266 266 def __len__(self):
267 267 return len(self.p.index)
268 268 def load(self, pos):
269 269 if pos < 0:
270 270 pos += len(self.p.index)
271 271 self.p.loadindex(pos)
272 272 return self.p.index[pos]
273 273 def __getitem__(self, pos):
274 274 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
275 275 def __setitem__(self, pos, item):
276 276 self.p.index[pos] = _pack(indexformatng, *item)
277 277 def __delitem__(self, pos):
278 278 del self.p.index[pos]
279 279 def insert(self, pos, e):
280 280 self.p.index.insert(pos, _pack(indexformatng, *e))
281 281 def append(self, e):
282 282 self.p.index.append(_pack(indexformatng, *e))
283 283
284 284 class lazymap(object):
285 285 """a lazy version of the node map"""
286 286 def __init__(self, parser):
287 287 self.p = parser
288 288 def load(self, key):
289 289 n = self.p.findnode(key)
290 290 if n is None:
291 291 raise KeyError(key)
292 292 def __contains__(self, key):
293 293 if key in self.p.map:
294 294 return True
295 295 self.p.loadmap()
296 296 return key in self.p.map
297 297 def __iter__(self):
298 298 yield nullid
299 299 for i, ret in enumerate(self.p.index):
300 300 if not ret:
301 301 self.p.loadindex(i)
302 302 ret = self.p.index[i]
303 303 if isinstance(ret, str):
304 304 ret = _unpack(indexformatng, ret)
305 305 yield ret[7]
306 306 def __getitem__(self, key):
307 307 try:
308 308 return self.p.map[key]
309 309 except KeyError:
310 310 try:
311 311 self.load(key)
312 312 return self.p.map[key]
313 313 except KeyError:
314 314 raise KeyError("node " + hex(key))
315 315 def __setitem__(self, key, val):
316 316 self.p.map[key] = val
317 317 def __delitem__(self, key):
318 318 del self.p.map[key]
319 319
320 320 indexformatv0 = ">4l20s20s20s"
321 321 v0shaoffset = 56
322 322
323 323 class revlogoldio(object):
324 324 def __init__(self):
325 325 self.size = struct.calcsize(indexformatv0)
326 326
327 327 def parseindex(self, fp, data, inline):
328 328 s = self.size
329 329 index = []
330 330 nodemap = {nullid: nullrev}
331 331 n = off = 0
332 332 if len(data) == _prereadsize:
333 333 data += fp.read() # read the rest
334 334 l = len(data)
335 335 while off + s <= l:
336 336 cur = data[off:off + s]
337 337 off += s
338 338 e = _unpack(indexformatv0, cur)
339 339 # transform to revlogv1 format
340 340 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
341 341 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
342 342 index.append(e2)
343 343 nodemap[e[6]] = n
344 344 n += 1
345 345
346 346 return index, nodemap, None
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 if gettype(entry[0]):
350 350 raise RevlogError(_("index entry flags need RevlogNG"))
351 351 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
352 352 node(entry[5]), node(entry[6]), entry[7])
353 353 return _pack(indexformatv0, *e2)
354 354
355 355 # index ng:
356 356 # 6 bytes offset
357 357 # 2 bytes flags
358 358 # 4 bytes compressed length
359 359 # 4 bytes uncompressed length
360 360 # 4 bytes: base rev
361 361 # 4 bytes link rev
362 362 # 4 bytes parent 1 rev
363 363 # 4 bytes parent 2 rev
364 364 # 32 bytes: nodeid
365 365 indexformatng = ">Qiiiiii20s12x"
366 366 ngshaoffset = 32
367 367 versionformat = ">I"
368 368
369 369 class revlogio(object):
370 370 def __init__(self):
371 371 self.size = struct.calcsize(indexformatng)
372 372
373 373 def parseindex(self, fp, data, inline):
374 374 if len(data) == _prereadsize:
375 375 if util.openhardlinks() and not inline:
376 376 # big index, let's parse it on demand
377 377 parser = lazyparser(fp)
378 378 index = lazyindex(parser)
379 379 nodemap = lazymap(parser)
380 380 e = list(index[0])
381 381 type = gettype(e[0])
382 382 e[0] = offset_type(0, type)
383 383 index[0] = e
384 384 return index, nodemap, None
385 385 else:
386 386 data += fp.read()
387 387
388 388 # call the C implementation to parse the index data
389 389 index, nodemap, cache = parsers.parse_index(data, inline)
390 390 return index, nodemap, cache
391 391
392 392 def packentry(self, entry, node, version, rev):
393 393 p = _pack(indexformatng, *entry)
394 394 if rev == 0:
395 395 p = _pack(versionformat, version) + p[4:]
396 396 return p
397 397
398 398 class revlog(object):
399 399 """
400 400 the underlying revision storage object
401 401
402 402 A revlog consists of two parts, an index and the revision data.
403 403
404 404 The index is a file with a fixed record size containing
405 405 information on each revision, including its nodeid (hash), the
406 406 nodeids of its parents, the position and offset of its data within
407 407 the data file, and the revision it's based on. Finally, each entry
408 408 contains a linkrev entry that can serve as a pointer to external
409 409 data.
410 410
411 411 The revision data itself is a linear collection of data chunks.
412 412 Each chunk represents a revision and is usually represented as a
413 413 delta against the previous chunk. To bound lookup time, runs of
414 414 deltas are limited to about 2 times the length of the original
415 415 version data. This makes retrieval of a version proportional to
416 416 its size, or O(1) relative to the number of revisions.
417 417
418 418 Both pieces of the revlog are written to in an append-only
419 419 fashion, which means we never need to rewrite a file to insert or
420 420 remove data, and can use some simple techniques to avoid the need
421 421 for locking while reading.
422 422 """
423 423 def __init__(self, opener, indexfile):
424 424 """
425 425 create a revlog object
426 426
427 427 opener is a function that abstracts the file opening operation
428 428 and can be used to implement COW semantics or the like.
429 429 """
430 430 self.indexfile = indexfile
431 431 self.datafile = indexfile[:-2] + ".d"
432 432 self.opener = opener
433 433 self._cache = None
434 434 self._chunkcache = (0, '')
435 435 self.nodemap = {nullid: nullrev}
436 436 self.index = []
437 437
438 438 v = REVLOG_DEFAULT_VERSION
439 439 if hasattr(opener, 'options') and 'defversion' in opener.options:
440 440 v = opener.options['defversion']
441 441 if v & REVLOGNG:
442 442 v |= REVLOGNGINLINEDATA
443 443
444 444 i = ''
445 445 try:
446 446 f = self.opener(self.indexfile)
447 i = f.read(_prereadsize)
447 if "nonlazy" in getattr(self.opener, 'options', {}):
448 i = f.read()
449 else:
450 i = f.read(_prereadsize)
448 451 if len(i) > 0:
449 452 v = struct.unpack(versionformat, i[:4])[0]
450 453 except IOError, inst:
451 454 if inst.errno != errno.ENOENT:
452 455 raise
453 456
454 457 self.version = v
455 458 self._inline = v & REVLOGNGINLINEDATA
456 459 flags = v & ~0xFFFF
457 460 fmt = v & 0xFFFF
458 461 if fmt == REVLOGV0 and flags:
459 462 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 463 % (self.indexfile, flags >> 16))
461 464 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 465 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 466 % (self.indexfile, flags >> 16))
464 467 elif fmt > REVLOGNG:
465 468 raise RevlogError(_("index %s unknown format %d")
466 469 % (self.indexfile, fmt))
467 470
468 471 self._io = revlogio()
469 472 if self.version == REVLOGV0:
470 473 self._io = revlogoldio()
471 474 if i:
472 475 try:
473 476 d = self._io.parseindex(f, i, self._inline)
474 477 except (ValueError, IndexError):
475 478 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
476 479 self.index, self.nodemap, self._chunkcache = d
477 480 if not self._chunkcache:
478 481 self._chunkclear()
479 482
480 483 # add the magic null revision at -1 (if it hasn't been done already)
481 484 if (self.index == [] or isinstance(self.index, lazyindex) or
482 485 self.index[-1][7] != nullid) :
483 486 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
484 487
485 488 def _loadindex(self, start, end):
486 489 """load a block of indexes all at once from the lazy parser"""
487 490 if isinstance(self.index, lazyindex):
488 491 self.index.p.loadindex(start, end)
489 492
490 493 def _loadindexmap(self):
491 494 """loads both the map and the index from the lazy parser"""
492 495 if isinstance(self.index, lazyindex):
493 496 p = self.index.p
494 497 p.loadindex()
495 498 self.nodemap = p.map
496 499
497 500 def _loadmap(self):
498 501 """loads the map from the lazy parser"""
499 502 if isinstance(self.nodemap, lazymap):
500 503 self.nodemap.p.loadmap()
501 504 self.nodemap = self.nodemap.p.map
502 505
503 506 def tip(self):
504 507 return self.node(len(self.index) - 2)
505 508 def __len__(self):
506 509 return len(self.index) - 1
507 510 def __iter__(self):
508 511 for i in xrange(len(self)):
509 512 yield i
510 513 def rev(self, node):
511 514 try:
512 515 return self.nodemap[node]
513 516 except KeyError:
514 517 raise LookupError(node, self.indexfile, _('no node'))
515 518 def node(self, rev):
516 519 return self.index[rev][7]
517 520 def linkrev(self, rev):
518 521 return self.index[rev][4]
519 522 def parents(self, node):
520 523 i = self.index
521 524 d = i[self.rev(node)]
522 525 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
523 526 def parentrevs(self, rev):
524 527 return self.index[rev][5:7]
525 528 def start(self, rev):
526 529 return int(self.index[rev][0] >> 16)
527 530 def end(self, rev):
528 531 return self.start(rev) + self.length(rev)
529 532 def length(self, rev):
530 533 return self.index[rev][1]
531 534 def base(self, rev):
532 535 return self.index[rev][3]
533 536
534 537 def size(self, rev):
535 538 """return the length of the uncompressed text for a given revision"""
536 539 l = self.index[rev][2]
537 540 if l >= 0:
538 541 return l
539 542
540 543 t = self.revision(self.node(rev))
541 544 return len(t)
542 545
543 546 def reachable(self, node, stop=None):
544 547 """return the set of all nodes ancestral to a given node, including
545 548 the node itself, stopping when stop is matched"""
546 549 reachable = set((node,))
547 550 visit = [node]
548 551 if stop:
549 552 stopn = self.rev(stop)
550 553 else:
551 554 stopn = 0
552 555 while visit:
553 556 n = visit.pop(0)
554 557 if n == stop:
555 558 continue
556 559 if n == nullid:
557 560 continue
558 561 for p in self.parents(n):
559 562 if self.rev(p) < stopn:
560 563 continue
561 564 if p not in reachable:
562 565 reachable.add(p)
563 566 visit.append(p)
564 567 return reachable
565 568
566 569 def ancestors(self, *revs):
567 570 """Generate the ancestors of 'revs' in reverse topological order.
568 571
569 572 Yield a sequence of revision numbers starting with the parents
570 573 of each revision in revs, i.e., each revision is *not* considered
571 574 an ancestor of itself. Results are in breadth-first order:
572 575 parents of each rev in revs, then parents of those, etc. Result
573 576 does not include the null revision."""
574 577 visit = list(revs)
575 578 seen = set([nullrev])
576 579 while visit:
577 580 for parent in self.parentrevs(visit.pop(0)):
578 581 if parent not in seen:
579 582 visit.append(parent)
580 583 seen.add(parent)
581 584 yield parent
582 585
583 586 def descendants(self, *revs):
584 587 """Generate the descendants of 'revs' in revision order.
585 588
586 589 Yield a sequence of revision numbers starting with a child of
587 590 some rev in revs, i.e., each revision is *not* considered a
588 591 descendant of itself. Results are ordered by revision number (a
589 592 topological sort)."""
590 593 seen = set(revs)
591 594 for i in xrange(min(revs) + 1, len(self)):
592 595 for x in self.parentrevs(i):
593 596 if x != nullrev and x in seen:
594 597 seen.add(i)
595 598 yield i
596 599 break
597 600
598 601 def findmissing(self, common=None, heads=None):
599 602 """Return the ancestors of heads that are not ancestors of common.
600 603
601 604 More specifically, return a list of nodes N such that every N
602 605 satisfies the following constraints:
603 606
604 607 1. N is an ancestor of some node in 'heads'
605 608 2. N is not an ancestor of any node in 'common'
606 609
607 610 The list is sorted by revision number, meaning it is
608 611 topologically sorted.
609 612
610 613 'heads' and 'common' are both lists of node IDs. If heads is
611 614 not supplied, uses all of the revlog's heads. If common is not
612 615 supplied, uses nullid."""
613 616 if common is None:
614 617 common = [nullid]
615 618 if heads is None:
616 619 heads = self.heads()
617 620
618 621 common = [self.rev(n) for n in common]
619 622 heads = [self.rev(n) for n in heads]
620 623
621 624 # we want the ancestors, but inclusive
622 625 has = set(self.ancestors(*common))
623 626 has.add(nullrev)
624 627 has.update(common)
625 628
626 629 # take all ancestors from heads that aren't in has
627 630 missing = set()
628 631 visit = [r for r in heads if r not in has]
629 632 while visit:
630 633 r = visit.pop(0)
631 634 if r in missing:
632 635 continue
633 636 else:
634 637 missing.add(r)
635 638 for p in self.parentrevs(r):
636 639 if p not in has:
637 640 visit.append(p)
638 641 missing = list(missing)
639 642 missing.sort()
640 643 return [self.node(r) for r in missing]
641 644
642 645 def nodesbetween(self, roots=None, heads=None):
643 646 """Return a topological path from 'roots' to 'heads'.
644 647
645 648 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
646 649 topologically sorted list of all nodes N that satisfy both of
647 650 these constraints:
648 651
649 652 1. N is a descendant of some node in 'roots'
650 653 2. N is an ancestor of some node in 'heads'
651 654
652 655 Every node is considered to be both a descendant and an ancestor
653 656 of itself, so every reachable node in 'roots' and 'heads' will be
654 657 included in 'nodes'.
655 658
656 659 'outroots' is the list of reachable nodes in 'roots', i.e., the
657 660 subset of 'roots' that is returned in 'nodes'. Likewise,
658 661 'outheads' is the subset of 'heads' that is also in 'nodes'.
659 662
660 663 'roots' and 'heads' are both lists of node IDs. If 'roots' is
661 664 unspecified, uses nullid as the only root. If 'heads' is
662 665 unspecified, uses list of all of the revlog's heads."""
663 666 nonodes = ([], [], [])
664 667 if roots is not None:
665 668 roots = list(roots)
666 669 if not roots:
667 670 return nonodes
668 671 lowestrev = min([self.rev(n) for n in roots])
669 672 else:
670 673 roots = [nullid] # Everybody's a descendent of nullid
671 674 lowestrev = nullrev
672 675 if (lowestrev == nullrev) and (heads is None):
673 676 # We want _all_ the nodes!
674 677 return ([self.node(r) for r in self], [nullid], list(self.heads()))
675 678 if heads is None:
676 679 # All nodes are ancestors, so the latest ancestor is the last
677 680 # node.
678 681 highestrev = len(self) - 1
679 682 # Set ancestors to None to signal that every node is an ancestor.
680 683 ancestors = None
681 684 # Set heads to an empty dictionary for later discovery of heads
682 685 heads = {}
683 686 else:
684 687 heads = list(heads)
685 688 if not heads:
686 689 return nonodes
687 690 ancestors = set()
688 691 # Turn heads into a dictionary so we can remove 'fake' heads.
689 692 # Also, later we will be using it to filter out the heads we can't
690 693 # find from roots.
691 694 heads = dict.fromkeys(heads, 0)
692 695 # Start at the top and keep marking parents until we're done.
693 696 nodestotag = set(heads)
694 697 # Remember where the top was so we can use it as a limit later.
695 698 highestrev = max([self.rev(n) for n in nodestotag])
696 699 while nodestotag:
697 700 # grab a node to tag
698 701 n = nodestotag.pop()
699 702 # Never tag nullid
700 703 if n == nullid:
701 704 continue
702 705 # A node's revision number represents its place in a
703 706 # topologically sorted list of nodes.
704 707 r = self.rev(n)
705 708 if r >= lowestrev:
706 709 if n not in ancestors:
707 710 # If we are possibly a descendent of one of the roots
708 711 # and we haven't already been marked as an ancestor
709 712 ancestors.add(n) # Mark as ancestor
710 713 # Add non-nullid parents to list of nodes to tag.
711 714 nodestotag.update([p for p in self.parents(n) if
712 715 p != nullid])
713 716 elif n in heads: # We've seen it before, is it a fake head?
714 717 # So it is, real heads should not be the ancestors of
715 718 # any other heads.
716 719 heads.pop(n)
717 720 if not ancestors:
718 721 return nonodes
719 722 # Now that we have our set of ancestors, we want to remove any
720 723 # roots that are not ancestors.
721 724
722 725 # If one of the roots was nullid, everything is included anyway.
723 726 if lowestrev > nullrev:
724 727 # But, since we weren't, let's recompute the lowest rev to not
725 728 # include roots that aren't ancestors.
726 729
727 730 # Filter out roots that aren't ancestors of heads
728 731 roots = [n for n in roots if n in ancestors]
729 732 # Recompute the lowest revision
730 733 if roots:
731 734 lowestrev = min([self.rev(n) for n in roots])
732 735 else:
733 736 # No more roots? Return empty list
734 737 return nonodes
735 738 else:
736 739 # We are descending from nullid, and don't need to care about
737 740 # any other roots.
738 741 lowestrev = nullrev
739 742 roots = [nullid]
740 743 # Transform our roots list into a set.
741 744 descendents = set(roots)
742 745 # Also, keep the original roots so we can filter out roots that aren't
743 746 # 'real' roots (i.e. are descended from other roots).
744 747 roots = descendents.copy()
745 748 # Our topologically sorted list of output nodes.
746 749 orderedout = []
747 750 # Don't start at nullid since we don't want nullid in our output list,
748 751 # and if nullid shows up in descedents, empty parents will look like
749 752 # they're descendents.
750 753 for r in xrange(max(lowestrev, 0), highestrev + 1):
751 754 n = self.node(r)
752 755 isdescendent = False
753 756 if lowestrev == nullrev: # Everybody is a descendent of nullid
754 757 isdescendent = True
755 758 elif n in descendents:
756 759 # n is already a descendent
757 760 isdescendent = True
758 761 # This check only needs to be done here because all the roots
759 762 # will start being marked is descendents before the loop.
760 763 if n in roots:
761 764 # If n was a root, check if it's a 'real' root.
762 765 p = tuple(self.parents(n))
763 766 # If any of its parents are descendents, it's not a root.
764 767 if (p[0] in descendents) or (p[1] in descendents):
765 768 roots.remove(n)
766 769 else:
767 770 p = tuple(self.parents(n))
768 771 # A node is a descendent if either of its parents are
769 772 # descendents. (We seeded the dependents list with the roots
770 773 # up there, remember?)
771 774 if (p[0] in descendents) or (p[1] in descendents):
772 775 descendents.add(n)
773 776 isdescendent = True
774 777 if isdescendent and ((ancestors is None) or (n in ancestors)):
775 778 # Only include nodes that are both descendents and ancestors.
776 779 orderedout.append(n)
777 780 if (ancestors is not None) and (n in heads):
778 781 # We're trying to figure out which heads are reachable
779 782 # from roots.
780 783 # Mark this head as having been reached
781 784 heads[n] = 1
782 785 elif ancestors is None:
783 786 # Otherwise, we're trying to discover the heads.
784 787 # Assume this is a head because if it isn't, the next step
785 788 # will eventually remove it.
786 789 heads[n] = 1
787 790 # But, obviously its parents aren't.
788 791 for p in self.parents(n):
789 792 heads.pop(p, None)
790 793 heads = [n for n in heads.iterkeys() if heads[n] != 0]
791 794 roots = list(roots)
792 795 assert orderedout
793 796 assert roots
794 797 assert heads
795 798 return (orderedout, roots, heads)
796 799
797 800 def heads(self, start=None, stop=None):
798 801 """return the list of all nodes that have no children
799 802
800 803 if start is specified, only heads that are descendants of
801 804 start will be returned
802 805 if stop is specified, it will consider all the revs from stop
803 806 as if they had no children
804 807 """
805 808 if start is None and stop is None:
806 809 count = len(self)
807 810 if not count:
808 811 return [nullid]
809 812 ishead = [1] * (count + 1)
810 813 index = self.index
811 814 for r in xrange(count):
812 815 e = index[r]
813 816 ishead[e[5]] = ishead[e[6]] = 0
814 817 return [self.node(r) for r in xrange(count) if ishead[r]]
815 818
816 819 if start is None:
817 820 start = nullid
818 821 if stop is None:
819 822 stop = []
820 823 stoprevs = set([self.rev(n) for n in stop])
821 824 startrev = self.rev(start)
822 825 reachable = set((startrev,))
823 826 heads = set((startrev,))
824 827
825 828 parentrevs = self.parentrevs
826 829 for r in xrange(startrev + 1, len(self)):
827 830 for p in parentrevs(r):
828 831 if p in reachable:
829 832 if r not in stoprevs:
830 833 reachable.add(r)
831 834 heads.add(r)
832 835 if p in heads and p not in stoprevs:
833 836 heads.remove(p)
834 837
835 838 return [self.node(r) for r in heads]
836 839
837 840 def children(self, node):
838 841 """find the children of a given node"""
839 842 c = []
840 843 p = self.rev(node)
841 844 for r in range(p + 1, len(self)):
842 845 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
843 846 if prevs:
844 847 for pr in prevs:
845 848 if pr == p:
846 849 c.append(self.node(r))
847 850 elif p == nullrev:
848 851 c.append(self.node(r))
849 852 return c
850 853
851 854 def descendant(self, start, end):
852 855 for i in self.descendants(start):
853 856 if i == end:
854 857 return True
855 858 elif i > end:
856 859 break
857 860 return False
858 861
859 862 def ancestor(self, a, b):
860 863 """calculate the least common ancestor of nodes a and b"""
861 864
862 865 # fast path, check if it is a descendant
863 866 a, b = self.rev(a), self.rev(b)
864 867 start, end = sorted((a, b))
865 868 if self.descendant(start, end):
866 869 return self.node(start)
867 870
868 871 def parents(rev):
869 872 return [p for p in self.parentrevs(rev) if p != nullrev]
870 873
871 874 c = ancestor.ancestor(a, b, parents)
872 875 if c is None:
873 876 return nullid
874 877
875 878 return self.node(c)
876 879
877 880 def _match(self, id):
878 881 if isinstance(id, (long, int)):
879 882 # rev
880 883 return self.node(id)
881 884 if len(id) == 20:
882 885 # possibly a binary node
883 886 # odds of a binary node being all hex in ASCII are 1 in 10**25
884 887 try:
885 888 node = id
886 889 self.rev(node) # quick search the index
887 890 return node
888 891 except LookupError:
889 892 pass # may be partial hex id
890 893 try:
891 894 # str(rev)
892 895 rev = int(id)
893 896 if str(rev) != id:
894 897 raise ValueError
895 898 if rev < 0:
896 899 rev = len(self) + rev
897 900 if rev < 0 or rev >= len(self):
898 901 raise ValueError
899 902 return self.node(rev)
900 903 except (ValueError, OverflowError):
901 904 pass
902 905 if len(id) == 40:
903 906 try:
904 907 # a full hex nodeid?
905 908 node = bin(id)
906 909 self.rev(node)
907 910 return node
908 911 except (TypeError, LookupError):
909 912 pass
910 913
911 914 def _partialmatch(self, id):
912 915 if len(id) < 40:
913 916 try:
914 917 # hex(node)[:...]
915 918 l = len(id) // 2 # grab an even number of digits
916 919 bin_id = bin(id[:l * 2])
917 920 nl = [n for n in self.nodemap if n[:l] == bin_id]
918 921 nl = [n for n in nl if hex(n).startswith(id)]
919 922 if len(nl) > 0:
920 923 if len(nl) == 1:
921 924 return nl[0]
922 925 raise LookupError(id, self.indexfile,
923 926 _('ambiguous identifier'))
924 927 return None
925 928 except TypeError:
926 929 pass
927 930
928 931 def lookup(self, id):
929 932 """locate a node based on:
930 933 - revision number or str(revision number)
931 934 - nodeid or subset of hex nodeid
932 935 """
933 936 n = self._match(id)
934 937 if n is not None:
935 938 return n
936 939 n = self._partialmatch(id)
937 940 if n:
938 941 return n
939 942
940 943 raise LookupError(id, self.indexfile, _('no match found'))
941 944
942 945 def cmp(self, node, text):
943 946 """compare text with a given file revision"""
944 947 p1, p2 = self.parents(node)
945 948 return hash(text, p1, p2) != node
946 949
947 950 def _addchunk(self, offset, data):
948 951 o, d = self._chunkcache
949 952 # try to add to existing cache
950 953 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
951 954 self._chunkcache = o, d + data
952 955 else:
953 956 self._chunkcache = offset, data
954 957
955 958 def _loadchunk(self, offset, length):
956 959 if self._inline:
957 960 df = self.opener(self.indexfile)
958 961 else:
959 962 df = self.opener(self.datafile)
960 963
961 964 readahead = max(65536, length)
962 965 df.seek(offset)
963 966 d = df.read(readahead)
964 967 self._addchunk(offset, d)
965 968 if readahead > length:
966 969 return d[:length]
967 970 return d
968 971
969 972 def _getchunk(self, offset, length):
970 973 o, d = self._chunkcache
971 974 l = len(d)
972 975
973 976 # is it in the cache?
974 977 cachestart = offset - o
975 978 cacheend = cachestart + length
976 979 if cachestart >= 0 and cacheend <= l:
977 980 if cachestart == 0 and cacheend == l:
978 981 return d # avoid a copy
979 982 return d[cachestart:cacheend]
980 983
981 984 return self._loadchunk(offset, length)
982 985
983 986 def _chunkraw(self, startrev, endrev):
984 987 start = self.start(startrev)
985 988 length = self.end(endrev) - start
986 989 if self._inline:
987 990 start += (startrev + 1) * self._io.size
988 991 return self._getchunk(start, length)
989 992
990 993 def _chunk(self, rev):
991 994 return decompress(self._chunkraw(rev, rev))
992 995
993 996 def _chunkclear(self):
994 997 self._chunkcache = (0, '')
995 998
996 999 def revdiff(self, rev1, rev2):
997 1000 """return or calculate a delta between two revisions"""
998 1001 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
999 1002 return self._chunk(rev2)
1000 1003
1001 1004 return mdiff.textdiff(self.revision(self.node(rev1)),
1002 1005 self.revision(self.node(rev2)))
1003 1006
1004 1007 def revision(self, node):
1005 1008 """return an uncompressed revision of a given node"""
1006 1009 if node == nullid:
1007 1010 return ""
1008 1011 if self._cache and self._cache[0] == node:
1009 1012 return self._cache[2]
1010 1013
1011 1014 # look up what we need to read
1012 1015 text = None
1013 1016 rev = self.rev(node)
1014 1017 base = self.base(rev)
1015 1018
1016 1019 # check rev flags
1017 1020 if self.index[rev][0] & 0xFFFF:
1018 1021 raise RevlogError(_('incompatible revision flag %x') %
1019 1022 (self.index[rev][0] & 0xFFFF))
1020 1023
1021 1024 # do we have useful data cached?
1022 1025 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1023 1026 base = self._cache[1]
1024 1027 text = self._cache[2]
1025 1028
1026 1029 self._loadindex(base, rev + 1)
1027 1030 self._chunkraw(base, rev)
1028 1031 if text is None:
1029 1032 text = self._chunk(base)
1030 1033
1031 1034 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1032 1035 text = mdiff.patches(text, bins)
1033 1036 p1, p2 = self.parents(node)
1034 1037 if node != hash(text, p1, p2):
1035 1038 raise RevlogError(_("integrity check failed on %s:%d")
1036 1039 % (self.indexfile, rev))
1037 1040
1038 1041 self._cache = (node, rev, text)
1039 1042 return text
1040 1043
1041 1044 def checkinlinesize(self, tr, fp=None):
1042 1045 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1043 1046 return
1044 1047
1045 1048 trinfo = tr.find(self.indexfile)
1046 1049 if trinfo is None:
1047 1050 raise RevlogError(_("%s not found in the transaction")
1048 1051 % self.indexfile)
1049 1052
1050 1053 trindex = trinfo[2]
1051 1054 dataoff = self.start(trindex)
1052 1055
1053 1056 tr.add(self.datafile, dataoff)
1054 1057
1055 1058 if fp:
1056 1059 fp.flush()
1057 1060 fp.close()
1058 1061
1059 1062 df = self.opener(self.datafile, 'w')
1060 1063 try:
1061 1064 for r in self:
1062 1065 df.write(self._chunkraw(r, r))
1063 1066 finally:
1064 1067 df.close()
1065 1068
1066 1069 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1067 1070 self.version &= ~(REVLOGNGINLINEDATA)
1068 1071 self._inline = False
1069 1072 for i in self:
1070 1073 e = self._io.packentry(self.index[i], self.node, self.version, i)
1071 1074 fp.write(e)
1072 1075
1073 1076 # if we don't call rename, the temp file will never replace the
1074 1077 # real index
1075 1078 fp.rename()
1076 1079
1077 1080 tr.replace(self.indexfile, trindex * self._io.size)
1078 1081 self._chunkclear()
1079 1082
1080 1083 def addrevision(self, text, transaction, link, p1, p2, d=None):
1081 1084 """add a revision to the log
1082 1085
1083 1086 text - the revision data to add
1084 1087 transaction - the transaction object used for rollback
1085 1088 link - the linkrev data to add
1086 1089 p1, p2 - the parent nodeids of the revision
1087 1090 d - an optional precomputed delta
1088 1091 """
1089 1092 dfh = None
1090 1093 if not self._inline:
1091 1094 dfh = self.opener(self.datafile, "a")
1092 1095 ifh = self.opener(self.indexfile, "a+")
1093 1096 try:
1094 1097 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1095 1098 finally:
1096 1099 if dfh:
1097 1100 dfh.close()
1098 1101 ifh.close()
1099 1102
1100 1103 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1101 1104 node = hash(text, p1, p2)
1102 1105 if node in self.nodemap:
1103 1106 return node
1104 1107
1105 1108 curr = len(self)
1106 1109 prev = curr - 1
1107 1110 base = self.base(prev)
1108 1111 offset = self.end(prev)
1109 1112
1110 1113 if curr:
1111 1114 if not d:
1112 1115 ptext = self.revision(self.node(prev))
1113 1116 d = mdiff.textdiff(ptext, text)
1114 1117 data = compress(d)
1115 1118 l = len(data[1]) + len(data[0])
1116 1119 dist = l + offset - self.start(base)
1117 1120
1118 1121 # full versions are inserted when the needed deltas
1119 1122 # become comparable to the uncompressed text
1120 1123 if not curr or dist > len(text) * 2:
1121 1124 data = compress(text)
1122 1125 l = len(data[1]) + len(data[0])
1123 1126 base = curr
1124 1127
1125 1128 e = (offset_type(offset, 0), l, len(text),
1126 1129 base, link, self.rev(p1), self.rev(p2), node)
1127 1130 self.index.insert(-1, e)
1128 1131 self.nodemap[node] = curr
1129 1132
1130 1133 entry = self._io.packentry(e, self.node, self.version, curr)
1131 1134 if not self._inline:
1132 1135 transaction.add(self.datafile, offset)
1133 1136 transaction.add(self.indexfile, curr * len(entry))
1134 1137 if data[0]:
1135 1138 dfh.write(data[0])
1136 1139 dfh.write(data[1])
1137 1140 dfh.flush()
1138 1141 ifh.write(entry)
1139 1142 else:
1140 1143 offset += curr * self._io.size
1141 1144 transaction.add(self.indexfile, offset, curr)
1142 1145 ifh.write(entry)
1143 1146 ifh.write(data[0])
1144 1147 ifh.write(data[1])
1145 1148 self.checkinlinesize(transaction, ifh)
1146 1149
1147 1150 if type(text) == str: # only accept immutable objects
1148 1151 self._cache = (node, curr, text)
1149 1152 return node
1150 1153
1151 1154 def group(self, nodelist, lookup, infocollect=None):
1152 1155 """Calculate a delta group, yielding a sequence of changegroup chunks
1153 1156 (strings).
1154 1157
1155 1158 Given a list of changeset revs, return a set of deltas and
1156 1159 metadata corresponding to nodes. the first delta is
1157 1160 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1158 1161 have this parent as it has all history before these
1159 1162 changesets. parent is parent[0]
1160 1163 """
1161 1164
1162 1165 revs = [self.rev(n) for n in nodelist]
1163 1166
1164 1167 # if we don't have any revisions touched by these changesets, bail
1165 1168 if not revs:
1166 1169 yield changegroup.closechunk()
1167 1170 return
1168 1171
1169 1172 # add the parent of the first rev
1170 1173 p = self.parentrevs(revs[0])[0]
1171 1174 revs.insert(0, p)
1172 1175
1173 1176 # build deltas
1174 1177 for d in xrange(len(revs) - 1):
1175 1178 a, b = revs[d], revs[d + 1]
1176 1179 nb = self.node(b)
1177 1180
1178 1181 if infocollect is not None:
1179 1182 infocollect(nb)
1180 1183
1181 1184 p = self.parents(nb)
1182 1185 meta = nb + p[0] + p[1] + lookup(nb)
1183 1186 if a == -1:
1184 1187 d = self.revision(nb)
1185 1188 meta += mdiff.trivialdiffheader(len(d))
1186 1189 else:
1187 1190 d = self.revdiff(a, b)
1188 1191 yield changegroup.chunkheader(len(meta) + len(d))
1189 1192 yield meta
1190 1193 if len(d) > 2**20:
1191 1194 pos = 0
1192 1195 while pos < len(d):
1193 1196 pos2 = pos + 2 ** 18
1194 1197 yield d[pos:pos2]
1195 1198 pos = pos2
1196 1199 else:
1197 1200 yield d
1198 1201
1199 1202 yield changegroup.closechunk()
1200 1203
1201 1204 def addgroup(self, revs, linkmapper, transaction):
1202 1205 """
1203 1206 add a delta group
1204 1207
1205 1208 given a set of deltas, add them to the revision log. the
1206 1209 first delta is against its parent, which should be in our
1207 1210 log, the rest are against the previous delta.
1208 1211 """
1209 1212
1210 1213 #track the base of the current delta log
1211 1214 r = len(self)
1212 1215 t = r - 1
1213 1216 node = None
1214 1217
1215 1218 base = prev = nullrev
1216 1219 start = end = textlen = 0
1217 1220 if r:
1218 1221 end = self.end(t)
1219 1222
1220 1223 ifh = self.opener(self.indexfile, "a+")
1221 1224 isize = r * self._io.size
1222 1225 if self._inline:
1223 1226 transaction.add(self.indexfile, end + isize, r)
1224 1227 dfh = None
1225 1228 else:
1226 1229 transaction.add(self.indexfile, isize, r)
1227 1230 transaction.add(self.datafile, end)
1228 1231 dfh = self.opener(self.datafile, "a")
1229 1232
1230 1233 try:
1231 1234 # loop through our set of deltas
1232 1235 chain = None
1233 1236 for chunk in revs:
1234 1237 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1235 1238 link = linkmapper(cs)
1236 1239 if node in self.nodemap:
1237 1240 # this can happen if two branches make the same change
1238 1241 chain = node
1239 1242 continue
1240 1243 delta = buffer(chunk, 80)
1241 1244 del chunk
1242 1245
1243 1246 for p in (p1, p2):
1244 1247 if not p in self.nodemap:
1245 1248 raise LookupError(p, self.indexfile, _('unknown parent'))
1246 1249
1247 1250 if not chain:
1248 1251 # retrieve the parent revision of the delta chain
1249 1252 chain = p1
1250 1253 if not chain in self.nodemap:
1251 1254 raise LookupError(chain, self.indexfile, _('unknown base'))
1252 1255
1253 1256 # full versions are inserted when the needed deltas become
1254 1257 # comparable to the uncompressed text or when the previous
1255 1258 # version is not the one we have a delta against. We use
1256 1259 # the size of the previous full rev as a proxy for the
1257 1260 # current size.
1258 1261
1259 1262 if chain == prev:
1260 1263 cdelta = compress(delta)
1261 1264 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1262 1265 textlen = mdiff.patchedsize(textlen, delta)
1263 1266
1264 1267 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1265 1268 # flush our writes here so we can read it in revision
1266 1269 if dfh:
1267 1270 dfh.flush()
1268 1271 ifh.flush()
1269 1272 text = self.revision(chain)
1270 1273 if len(text) == 0:
1271 1274 # skip over trivial delta header
1272 1275 text = buffer(delta, 12)
1273 1276 else:
1274 1277 text = mdiff.patches(text, [delta])
1275 1278 del delta
1276 1279 chk = self._addrevision(text, transaction, link, p1, p2, None,
1277 1280 ifh, dfh)
1278 1281 if not dfh and not self._inline:
1279 1282 # addrevision switched from inline to conventional
1280 1283 # reopen the index
1281 1284 dfh = self.opener(self.datafile, "a")
1282 1285 ifh = self.opener(self.indexfile, "a")
1283 1286 if chk != node:
1284 1287 raise RevlogError(_("consistency error adding group"))
1285 1288 textlen = len(text)
1286 1289 else:
1287 1290 e = (offset_type(end, 0), cdeltalen, textlen, base,
1288 1291 link, self.rev(p1), self.rev(p2), node)
1289 1292 self.index.insert(-1, e)
1290 1293 self.nodemap[node] = r
1291 1294 entry = self._io.packentry(e, self.node, self.version, r)
1292 1295 if self._inline:
1293 1296 ifh.write(entry)
1294 1297 ifh.write(cdelta[0])
1295 1298 ifh.write(cdelta[1])
1296 1299 self.checkinlinesize(transaction, ifh)
1297 1300 if not self._inline:
1298 1301 dfh = self.opener(self.datafile, "a")
1299 1302 ifh = self.opener(self.indexfile, "a")
1300 1303 else:
1301 1304 dfh.write(cdelta[0])
1302 1305 dfh.write(cdelta[1])
1303 1306 ifh.write(entry)
1304 1307
1305 1308 t, r, chain, prev = r, r + 1, node, node
1306 1309 base = self.base(t)
1307 1310 start = self.start(base)
1308 1311 end = self.end(t)
1309 1312 finally:
1310 1313 if dfh:
1311 1314 dfh.close()
1312 1315 ifh.close()
1313 1316
1314 1317 return node
1315 1318
1316 1319 def strip(self, minlink, transaction):
1317 1320 """truncate the revlog on the first revision with a linkrev >= minlink
1318 1321
1319 1322 This function is called when we're stripping revision minlink and
1320 1323 its descendants from the repository.
1321 1324
1322 1325 We have to remove all revisions with linkrev >= minlink, because
1323 1326 the equivalent changelog revisions will be renumbered after the
1324 1327 strip.
1325 1328
1326 1329 So we truncate the revlog on the first of these revisions, and
1327 1330 trust that the caller has saved the revisions that shouldn't be
1328 1331 removed and that it'll readd them after this truncation.
1329 1332 """
1330 1333 if len(self) == 0:
1331 1334 return
1332 1335
1333 1336 if isinstance(self.index, lazyindex):
1334 1337 self._loadindexmap()
1335 1338
1336 1339 for rev in self:
1337 1340 if self.index[rev][4] >= minlink:
1338 1341 break
1339 1342 else:
1340 1343 return
1341 1344
1342 1345 # first truncate the files on disk
1343 1346 end = self.start(rev)
1344 1347 if not self._inline:
1345 1348 transaction.add(self.datafile, end)
1346 1349 end = rev * self._io.size
1347 1350 else:
1348 1351 end += rev * self._io.size
1349 1352
1350 1353 transaction.add(self.indexfile, end)
1351 1354
1352 1355 # then reset internal state in memory to forget those revisions
1353 1356 self._cache = None
1354 1357 self._chunkclear()
1355 1358 for x in xrange(rev, len(self)):
1356 1359 del self.nodemap[self.node(x)]
1357 1360
1358 1361 del self.index[rev:-1]
1359 1362
1360 1363 def checksize(self):
1361 1364 expected = 0
1362 1365 if len(self):
1363 1366 expected = max(0, self.end(len(self) - 1))
1364 1367
1365 1368 try:
1366 1369 f = self.opener(self.datafile)
1367 1370 f.seek(0, 2)
1368 1371 actual = f.tell()
1369 1372 dd = actual - expected
1370 1373 except IOError, inst:
1371 1374 if inst.errno != errno.ENOENT:
1372 1375 raise
1373 1376 dd = 0
1374 1377
1375 1378 try:
1376 1379 f = self.opener(self.indexfile)
1377 1380 f.seek(0, 2)
1378 1381 actual = f.tell()
1379 1382 s = self._io.size
1380 1383 i = max(0, actual // s)
1381 1384 di = actual - (i * s)
1382 1385 if self._inline:
1383 1386 databytes = 0
1384 1387 for r in self:
1385 1388 databytes += max(0, self.length(r))
1386 1389 dd = 0
1387 1390 di = actual - len(self) * s - databytes
1388 1391 except IOError, inst:
1389 1392 if inst.errno != errno.ENOENT:
1390 1393 raise
1391 1394 di = 0
1392 1395
1393 1396 return (dd, di)
1394 1397
1395 1398 def files(self):
1396 1399 res = [self.indexfile]
1397 1400 if not self._inline:
1398 1401 res.append(self.datafile)
1399 1402 return res
@@ -1,144 +1,145
1 1 # statichttprepo.py - simple http repository class for mercurial
2 2 #
3 3 # This provides read-only repo access to repositories exported via static http
4 4 #
5 5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from i18n import _
11 11 import changelog, byterange, url, error
12 12 import localrepo, manifest, util, store
13 13 import urllib, urllib2, errno
14 14
15 15 class httprangereader(object):
16 16 def __init__(self, url, opener):
17 17 # we assume opener has HTTPRangeHandler
18 18 self.url = url
19 19 self.pos = 0
20 20 self.opener = opener
21 21 self.name = url
22 22 def seek(self, pos):
23 23 self.pos = pos
24 24 def read(self, bytes=None):
25 25 req = urllib2.Request(self.url)
26 26 end = ''
27 27 if bytes:
28 28 end = self.pos + bytes - 1
29 29 req.add_header('Range', 'bytes=%d-%s' % (self.pos, end))
30 30
31 31 try:
32 32 f = self.opener.open(req)
33 33 data = f.read()
34 34 if hasattr(f, 'getcode'):
35 35 # python 2.6+
36 36 code = f.getcode()
37 37 elif hasattr(f, 'code'):
38 38 # undocumented attribute, seems to be set in 2.4 and 2.5
39 39 code = f.code
40 40 else:
41 41 # Don't know how to check, hope for the best.
42 42 code = 206
43 43 except urllib2.HTTPError, inst:
44 44 num = inst.code == 404 and errno.ENOENT or None
45 45 raise IOError(num, inst)
46 46 except urllib2.URLError, inst:
47 47 raise IOError(None, inst.reason[1])
48 48
49 49 if code == 200:
50 50 # HTTPRangeHandler does nothing if remote does not support
51 51 # Range headers and returns the full entity. Let's slice it.
52 52 if bytes:
53 53 data = data[self.pos:self.pos + bytes]
54 54 else:
55 55 data = data[self.pos:]
56 56 elif bytes:
57 57 data = data[:bytes]
58 58 self.pos += len(data)
59 59 return data
60 60 def __iter__(self):
61 61 return iter(self.read().splitlines(1))
62 62 def close(self):
63 63 pass
64 64
65 65 def build_opener(ui, authinfo):
66 66 # urllib cannot handle URLs with embedded user or passwd
67 67 urlopener = url.opener(ui, authinfo)
68 68 urlopener.add_handler(byterange.HTTPRangeHandler())
69 69
70 70 def opener(base):
71 71 """return a function that opens files over http"""
72 72 p = base
73 73 def o(path, mode="r", atomictemp=None):
74 74 if 'a' in mode or 'w' in mode:
75 75 raise IOError('Permission denied')
76 76 f = "/".join((p, urllib.quote(path)))
77 77 return httprangereader(f, urlopener)
78 78 return o
79 79
80 opener.options = {'nonlazy': 1}
80 81 return opener
81 82
82 83 class statichttprepository(localrepo.localrepository):
83 84 def __init__(self, ui, path):
84 85 self._url = path
85 86 self.ui = ui
86 87
87 88 self.root = path
88 89 self.path, authinfo = url.getauthinfo(path.rstrip('/') + "/.hg")
89 90
90 91 opener = build_opener(ui, authinfo)
91 92 self.opener = opener(self.path)
92 93
93 94 # find requirements
94 95 try:
95 96 requirements = self.opener("requires").read().splitlines()
96 97 except IOError, inst:
97 98 if inst.errno != errno.ENOENT:
98 99 raise
99 100 # check if it is a non-empty old-style repository
100 101 try:
101 102 self.opener("00changelog.i").read(1)
102 103 except IOError, inst:
103 104 if inst.errno != errno.ENOENT:
104 105 raise
105 106 # we do not care about empty old-style repositories here
106 107 msg = _("'%s' does not appear to be an hg repository") % path
107 108 raise error.RepoError(msg)
108 109 requirements = []
109 110
110 111 # check them
111 112 for r in requirements:
112 113 if r not in self.supported:
113 114 raise error.RepoError(_("requirement '%s' not supported") % r)
114 115
115 116 # setup store
116 117 def pjoin(a, b):
117 118 return a + '/' + b
118 119 self.store = store.store(requirements, self.path, opener, pjoin)
119 120 self.spath = self.store.path
120 121 self.sopener = self.store.opener
121 122 self.sjoin = self.store.join
122 123
123 124 self.manifest = manifest.manifest(self.sopener)
124 125 self.changelog = changelog.changelog(self.sopener)
125 126 self._tags = None
126 127 self.nodetagscache = None
127 128 self._branchcache = None
128 129 self._branchcachetip = None
129 130 self.encodepats = None
130 131 self.decodepats = None
131 132
132 133 def url(self):
133 134 return self._url
134 135
135 136 def local(self):
136 137 return False
137 138
138 139 def lock(self, wait=True):
139 140 raise util.Abort(_('cannot lock static-http repository'))
140 141
141 142 def instance(ui, path, create):
142 143 if create:
143 144 raise util.Abort(_('cannot create new static-http repository'))
144 145 return statichttprepository(ui, path[7:])
General Comments 0
You need to be logged in to leave comments. Login now