##// END OF EJS Templates
revlog: optimize deltachain
Matt Mackall -
r11935:927ada25 default
parent child Browse files
Show More
@@ -1,1450 +1,1464 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 _cached, _uncached = 0, 0
27
26 28 # revlog header flags
27 29 REVLOGV0 = 0
28 30 REVLOGNG = 1
29 31 REVLOGNGINLINEDATA = (1 << 16)
30 32 REVLOGSHALLOW = (1 << 17)
31 33 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 34 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 35 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 36 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 37
36 38 # revlog index flags
37 39 REVIDX_PARENTDELTA = 1
38 40 REVIDX_PUNCHED_FLAG = 2
39 41 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 42
41 43 # amount of data read unconditionally, should be >= 4
42 44 # when not inline: threshold for using lazy index
43 45 _prereadsize = 1048576
44 46 # max size of revlog with inline data
45 47 _maxinline = 131072
46 48
47 49 RevlogError = error.RevlogError
48 50 LookupError = error.LookupError
49 51
50 52 def getoffset(q):
51 53 return int(q >> 16)
52 54
53 55 def gettype(q):
54 56 return int(q & 0xFFFF)
55 57
56 58 def offset_type(offset, type):
57 59 return long(long(offset) << 16 | type)
58 60
59 61 nullhash = _sha(nullid)
60 62
61 63 def hash(text, p1, p2):
62 64 """generate a hash from the given text and its parent hashes
63 65
64 66 This hash combines both the current file contents and its history
65 67 in a manner that makes it easy to distinguish nodes with the same
66 68 content in the revision graph.
67 69 """
68 70 # As of now, if one of the parent node is null, p2 is null
69 71 if p2 == nullid:
70 72 # deep copy of a hash is faster than creating one
71 73 s = nullhash.copy()
72 74 s.update(p1)
73 75 else:
74 76 # none of the parent nodes are nullid
75 77 l = [p1, p2]
76 78 l.sort()
77 79 s = _sha(l[0])
78 80 s.update(l[1])
79 81 s.update(text)
80 82 return s.digest()
81 83
82 84 def compress(text):
83 85 """ generate a possibly-compressed representation of text """
84 86 if not text:
85 87 return ("", text)
86 88 l = len(text)
87 89 bin = None
88 90 if l < 44:
89 91 pass
90 92 elif l > 1000000:
91 93 # zlib makes an internal copy, thus doubling memory usage for
92 94 # large files, so lets do this in pieces
93 95 z = zlib.compressobj()
94 96 p = []
95 97 pos = 0
96 98 while pos < l:
97 99 pos2 = pos + 2**20
98 100 p.append(z.compress(text[pos:pos2]))
99 101 pos = pos2
100 102 p.append(z.flush())
101 103 if sum(map(len, p)) < l:
102 104 bin = "".join(p)
103 105 else:
104 106 bin = _compress(text)
105 107 if bin is None or len(bin) > l:
106 108 if text[0] == '\0':
107 109 return ("", text)
108 110 return ('u', text)
109 111 return ("", bin)
110 112
111 113 def decompress(bin):
112 114 """ decompress the given input """
113 115 if not bin:
114 116 return bin
115 117 t = bin[0]
116 118 if t == '\0':
117 119 return bin
118 120 if t == 'x':
119 121 return _decompress(bin)
120 122 if t == 'u':
121 123 return bin[1:]
122 124 raise RevlogError(_("unknown compression type %r") % t)
123 125
124 126 class lazyparser(object):
125 127 """
126 128 this class avoids the need to parse the entirety of large indices
127 129 """
128 130
129 131 # lazyparser is not safe to use on windows if win32 extensions not
130 132 # available. it keeps file handle open, which make it not possible
131 133 # to break hardlinks on local cloned repos.
132 134
133 135 def __init__(self, dataf):
134 136 try:
135 137 size = util.fstat(dataf).st_size
136 138 except AttributeError:
137 139 size = 0
138 140 self.dataf = dataf
139 141 self.s = struct.calcsize(indexformatng)
140 142 self.datasize = size
141 143 self.l = size // self.s
142 144 self.index = [None] * self.l
143 145 self.map = {nullid: nullrev}
144 146 self.allmap = 0
145 147 self.all = 0
146 148 self.mapfind_count = 0
147 149
148 150 def loadmap(self):
149 151 """
150 152 during a commit, we need to make sure the rev being added is
151 153 not a duplicate. This requires loading the entire index,
152 154 which is fairly slow. loadmap can load up just the node map,
153 155 which takes much less time.
154 156 """
155 157 if self.allmap:
156 158 return
157 159 end = self.datasize
158 160 self.allmap = 1
159 161 cur = 0
160 162 count = 0
161 163 blocksize = self.s * 256
162 164 self.dataf.seek(0)
163 165 while cur < end:
164 166 data = self.dataf.read(blocksize)
165 167 off = 0
166 168 for x in xrange(256):
167 169 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 170 self.map[n] = count
169 171 count += 1
170 172 if count >= self.l:
171 173 break
172 174 off += self.s
173 175 cur += blocksize
174 176
175 177 def loadblock(self, blockstart, blocksize, data=None):
176 178 if self.all:
177 179 return
178 180 if data is None:
179 181 self.dataf.seek(blockstart)
180 182 if blockstart + blocksize > self.datasize:
181 183 # the revlog may have grown since we've started running,
182 184 # but we don't have space in self.index for more entries.
183 185 # limit blocksize so that we don't get too much data.
184 186 blocksize = max(self.datasize - blockstart, 0)
185 187 data = self.dataf.read(blocksize)
186 188 lend = len(data) // self.s
187 189 i = blockstart // self.s
188 190 off = 0
189 191 # lazyindex supports __delitem__
190 192 if lend > len(self.index) - i:
191 193 lend = len(self.index) - i
192 194 for x in xrange(lend):
193 195 if self.index[i + x] is None:
194 196 b = data[off : off + self.s]
195 197 self.index[i + x] = b
196 198 n = b[ngshaoffset:ngshaoffset + 20]
197 199 self.map[n] = i + x
198 200 off += self.s
199 201
200 202 def findnode(self, node):
201 203 """search backwards through the index file for a specific node"""
202 204 if self.allmap:
203 205 return None
204 206
205 207 # hg log will cause many many searches for the manifest
206 208 # nodes. After we get called a few times, just load the whole
207 209 # thing.
208 210 if self.mapfind_count > 8:
209 211 self.loadmap()
210 212 if node in self.map:
211 213 return node
212 214 return None
213 215 self.mapfind_count += 1
214 216 last = self.l - 1
215 217 while self.index[last] != None:
216 218 if last == 0:
217 219 self.all = 1
218 220 self.allmap = 1
219 221 return None
220 222 last -= 1
221 223 end = (last + 1) * self.s
222 224 blocksize = self.s * 256
223 225 while end >= 0:
224 226 start = max(end - blocksize, 0)
225 227 self.dataf.seek(start)
226 228 data = self.dataf.read(end - start)
227 229 findend = end - start
228 230 while True:
229 231 # we're searching backwards, so we have to make sure
230 232 # we don't find a changeset where this node is a parent
231 233 off = data.find(node, 0, findend)
232 234 findend = off
233 235 if off >= 0:
234 236 i = off / self.s
235 237 off = i * self.s
236 238 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 239 if n == node:
238 240 self.map[n] = i + start / self.s
239 241 return node
240 242 else:
241 243 break
242 244 end -= blocksize
243 245 return None
244 246
245 247 def loadindex(self, i=None, end=None):
246 248 if self.all:
247 249 return
248 250 all = False
249 251 if i is None:
250 252 blockstart = 0
251 253 blocksize = (65536 / self.s) * self.s
252 254 end = self.datasize
253 255 all = True
254 256 else:
255 257 if end:
256 258 blockstart = i * self.s
257 259 end = end * self.s
258 260 blocksize = end - blockstart
259 261 else:
260 262 blockstart = (i & ~1023) * self.s
261 263 blocksize = self.s * 1024
262 264 end = blockstart + blocksize
263 265 while blockstart < end:
264 266 self.loadblock(blockstart, blocksize)
265 267 blockstart += blocksize
266 268 if all:
267 269 self.all = True
268 270
269 271 class lazyindex(object):
270 272 """a lazy version of the index array"""
271 273 def __init__(self, parser):
272 274 self.p = parser
273 275 def __len__(self):
274 276 return len(self.p.index)
275 277 def load(self, pos):
276 278 if pos < 0:
277 279 pos += len(self.p.index)
278 280 self.p.loadindex(pos)
279 281 return self.p.index[pos]
280 282 def __getitem__(self, pos):
281 283 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 284 def __setitem__(self, pos, item):
283 285 self.p.index[pos] = _pack(indexformatng, *item)
284 286 def __delitem__(self, pos):
285 287 del self.p.index[pos]
286 288 def insert(self, pos, e):
287 289 self.p.index.insert(pos, _pack(indexformatng, *e))
288 290 def append(self, e):
289 291 self.p.index.append(_pack(indexformatng, *e))
290 292
291 293 class lazymap(object):
292 294 """a lazy version of the node map"""
293 295 def __init__(self, parser):
294 296 self.p = parser
295 297 def load(self, key):
296 298 n = self.p.findnode(key)
297 299 if n is None:
298 300 raise KeyError(key)
299 301 def __contains__(self, key):
300 302 if key in self.p.map:
301 303 return True
302 304 self.p.loadmap()
303 305 return key in self.p.map
304 306 def __iter__(self):
305 307 yield nullid
306 308 for i, ret in enumerate(self.p.index):
307 309 if not ret:
308 310 self.p.loadindex(i)
309 311 ret = self.p.index[i]
310 312 if isinstance(ret, str):
311 313 ret = _unpack(indexformatng, ret)
312 314 yield ret[7]
313 315 def __getitem__(self, key):
314 316 try:
315 317 return self.p.map[key]
316 318 except KeyError:
317 319 try:
318 320 self.load(key)
319 321 return self.p.map[key]
320 322 except KeyError:
321 323 raise KeyError("node " + hex(key))
322 324 def __setitem__(self, key, val):
323 325 self.p.map[key] = val
324 326 def __delitem__(self, key):
325 327 del self.p.map[key]
326 328
327 329 indexformatv0 = ">4l20s20s20s"
328 330 v0shaoffset = 56
329 331
330 332 class revlogoldio(object):
331 333 def __init__(self):
332 334 self.size = struct.calcsize(indexformatv0)
333 335
334 336 def parseindex(self, fp, data, inline):
335 337 s = self.size
336 338 index = []
337 339 nodemap = {nullid: nullrev}
338 340 n = off = 0
339 341 if len(data) == _prereadsize:
340 342 data += fp.read() # read the rest
341 343 l = len(data)
342 344 while off + s <= l:
343 345 cur = data[off:off + s]
344 346 off += s
345 347 e = _unpack(indexformatv0, cur)
346 348 # transform to revlogv1 format
347 349 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 350 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 351 index.append(e2)
350 352 nodemap[e[6]] = n
351 353 n += 1
352 354
353 355 return index, nodemap, None
354 356
355 357 def packentry(self, entry, node, version, rev):
356 358 if gettype(entry[0]):
357 359 raise RevlogError(_("index entry flags need RevlogNG"))
358 360 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 361 node(entry[5]), node(entry[6]), entry[7])
360 362 return _pack(indexformatv0, *e2)
361 363
362 364 # index ng:
363 365 # 6 bytes: offset
364 366 # 2 bytes: flags
365 367 # 4 bytes: compressed length
366 368 # 4 bytes: uncompressed length
367 369 # 4 bytes: base rev
368 370 # 4 bytes: link rev
369 371 # 4 bytes: parent 1 rev
370 372 # 4 bytes: parent 2 rev
371 373 # 32 bytes: nodeid
372 374 indexformatng = ">Qiiiiii20s12x"
373 375 ngshaoffset = 32
374 376 versionformat = ">I"
375 377
376 378 class revlogio(object):
377 379 def __init__(self):
378 380 self.size = struct.calcsize(indexformatng)
379 381
380 382 def parseindex(self, fp, data, inline):
381 383 if len(data) == _prereadsize:
382 384 if util.openhardlinks() and not inline:
383 385 # big index, let's parse it on demand
384 386 parser = lazyparser(fp)
385 387 index = lazyindex(parser)
386 388 nodemap = lazymap(parser)
387 389 e = list(index[0])
388 390 type = gettype(e[0])
389 391 e[0] = offset_type(0, type)
390 392 index[0] = e
391 393 return index, nodemap, None
392 394 else:
393 395 data += fp.read()
394 396
395 397 # call the C implementation to parse the index data
396 398 index, nodemap, cache = parsers.parse_index(data, inline)
397 399 return index, nodemap, cache
398 400
399 401 def packentry(self, entry, node, version, rev):
400 402 p = _pack(indexformatng, *entry)
401 403 if rev == 0:
402 404 p = _pack(versionformat, version) + p[4:]
403 405 return p
404 406
405 407 class revlog(object):
406 408 """
407 409 the underlying revision storage object
408 410
409 411 A revlog consists of two parts, an index and the revision data.
410 412
411 413 The index is a file with a fixed record size containing
412 414 information on each revision, including its nodeid (hash), the
413 415 nodeids of its parents, the position and offset of its data within
414 416 the data file, and the revision it's based on. Finally, each entry
415 417 contains a linkrev entry that can serve as a pointer to external
416 418 data.
417 419
418 420 The revision data itself is a linear collection of data chunks.
419 421 Each chunk represents a revision and is usually represented as a
420 422 delta against the previous chunk. To bound lookup time, runs of
421 423 deltas are limited to about 2 times the length of the original
422 424 version data. This makes retrieval of a version proportional to
423 425 its size, or O(1) relative to the number of revisions.
424 426
425 427 Both pieces of the revlog are written to in an append-only
426 428 fashion, which means we never need to rewrite a file to insert or
427 429 remove data, and can use some simple techniques to avoid the need
428 430 for locking while reading.
429 431 """
430 432 def __init__(self, opener, indexfile, shallowroot=None):
431 433 """
432 434 create a revlog object
433 435
434 436 opener is a function that abstracts the file opening operation
435 437 and can be used to implement COW semantics or the like.
436 438 """
437 439 self.indexfile = indexfile
438 440 self.datafile = indexfile[:-2] + ".d"
439 441 self.opener = opener
440 442 self._cache = None
441 443 self._chunkcache = (0, '')
442 444 self.nodemap = {nullid: nullrev}
443 445 self.index = []
444 446 self._shallowroot = shallowroot
445 447 self._parentdelta = 0
446 448
447 449 v = REVLOG_DEFAULT_VERSION
448 450 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 451 v = opener.options['defversion']
450 452 if v & REVLOGNG:
451 453 v |= REVLOGNGINLINEDATA
452 454 if v & REVLOGNG and 'parentdelta' in opener.options:
453 455 self._parentdelta = 1
454 456
455 457 if shallowroot:
456 458 v |= REVLOGSHALLOW
457 459
458 460 i = ''
459 461 try:
460 462 f = self.opener(self.indexfile)
461 463 if "nonlazy" in getattr(self.opener, 'options', {}):
462 464 i = f.read()
463 465 else:
464 466 i = f.read(_prereadsize)
465 467 if len(i) > 0:
466 468 v = struct.unpack(versionformat, i[:4])[0]
467 469 except IOError, inst:
468 470 if inst.errno != errno.ENOENT:
469 471 raise
470 472
471 473 self.version = v
472 474 self._inline = v & REVLOGNGINLINEDATA
473 475 self._shallow = v & REVLOGSHALLOW
474 476 flags = v & ~0xFFFF
475 477 fmt = v & 0xFFFF
476 478 if fmt == REVLOGV0 and flags:
477 479 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 480 % (self.indexfile, flags >> 16))
479 481 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 482 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 483 % (self.indexfile, flags >> 16))
482 484 elif fmt > REVLOGNG:
483 485 raise RevlogError(_("index %s unknown format %d")
484 486 % (self.indexfile, fmt))
485 487
486 488 self._io = revlogio()
487 489 if self.version == REVLOGV0:
488 490 self._io = revlogoldio()
489 491 if i:
490 492 try:
491 493 d = self._io.parseindex(f, i, self._inline)
492 494 except (ValueError, IndexError):
493 495 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 496 self.index, self.nodemap, self._chunkcache = d
495 497 if not self._chunkcache:
496 498 self._chunkclear()
497 499
498 500 # add the magic null revision at -1 (if it hasn't been done already)
499 501 if (self.index == [] or isinstance(self.index, lazyindex) or
500 502 self.index[-1][7] != nullid) :
501 503 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 504
503 505 def _loadindex(self, start, end):
504 506 """load a block of indexes all at once from the lazy parser"""
505 507 if isinstance(self.index, lazyindex):
506 508 self.index.p.loadindex(start, end)
507 509
508 510 def _loadindexmap(self):
509 511 """loads both the map and the index from the lazy parser"""
510 512 if isinstance(self.index, lazyindex):
511 513 p = self.index.p
512 514 p.loadindex()
513 515 self.nodemap = p.map
514 516
515 517 def _loadmap(self):
516 518 """loads the map from the lazy parser"""
517 519 if isinstance(self.nodemap, lazymap):
518 520 self.nodemap.p.loadmap()
519 521 self.nodemap = self.nodemap.p.map
520 522
521 523 def tip(self):
522 524 return self.node(len(self.index) - 2)
523 525 def __len__(self):
524 526 return len(self.index) - 1
525 527 def __iter__(self):
526 528 for i in xrange(len(self)):
527 529 yield i
528 530 def rev(self, node):
529 531 try:
530 532 return self.nodemap[node]
531 533 except KeyError:
532 534 raise LookupError(node, self.indexfile, _('no node'))
533 535 def node(self, rev):
534 536 return self.index[rev][7]
535 537 def linkrev(self, rev):
536 538 return self.index[rev][4]
537 539 def parents(self, node):
538 540 i = self.index
539 541 d = i[self.rev(node)]
540 542 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 543 def parentrevs(self, rev):
542 544 return self.index[rev][5:7]
543 545 def start(self, rev):
544 546 return int(self.index[rev][0] >> 16)
545 547 def end(self, rev):
546 548 return self.start(rev) + self.length(rev)
547 549 def length(self, rev):
548 550 return self.index[rev][1]
549 551 def base(self, rev):
550 552 return self.index[rev][3]
551 553 def flags(self, rev):
552 554 return self.index[rev][0] & 0xFFFF
553 555
554 556 def size(self, rev):
555 557 """return the length of the uncompressed text for a given revision"""
556 558 l = self.index[rev][2]
557 559 if l >= 0:
558 560 return l
559 561
560 562 t = self.revision(self.node(rev))
561 563 return len(t)
562 564
563 565 def reachable(self, node, stop=None):
564 566 """return the set of all nodes ancestral to a given node, including
565 567 the node itself, stopping when stop is matched"""
566 568 reachable = set((node,))
567 569 visit = [node]
568 570 if stop:
569 571 stopn = self.rev(stop)
570 572 else:
571 573 stopn = 0
572 574 while visit:
573 575 n = visit.pop(0)
574 576 if n == stop:
575 577 continue
576 578 if n == nullid:
577 579 continue
578 580 for p in self.parents(n):
579 581 if self.rev(p) < stopn:
580 582 continue
581 583 if p not in reachable:
582 584 reachable.add(p)
583 585 visit.append(p)
584 586 return reachable
585 587
586 588 def ancestors(self, *revs):
587 589 """Generate the ancestors of 'revs' in reverse topological order.
588 590
589 591 Yield a sequence of revision numbers starting with the parents
590 592 of each revision in revs, i.e., each revision is *not* considered
591 593 an ancestor of itself. Results are in breadth-first order:
592 594 parents of each rev in revs, then parents of those, etc. Result
593 595 does not include the null revision."""
594 596 visit = list(revs)
595 597 seen = set([nullrev])
596 598 while visit:
597 599 for parent in self.parentrevs(visit.pop(0)):
598 600 if parent not in seen:
599 601 visit.append(parent)
600 602 seen.add(parent)
601 603 yield parent
602 604
603 605 def descendants(self, *revs):
604 606 """Generate the descendants of 'revs' in revision order.
605 607
606 608 Yield a sequence of revision numbers starting with a child of
607 609 some rev in revs, i.e., each revision is *not* considered a
608 610 descendant of itself. Results are ordered by revision number (a
609 611 topological sort)."""
610 612 seen = set(revs)
611 613 for i in xrange(min(revs) + 1, len(self)):
612 614 for x in self.parentrevs(i):
613 615 if x != nullrev and x in seen:
614 616 seen.add(i)
615 617 yield i
616 618 break
617 619
618 620 def findmissing(self, common=None, heads=None):
619 621 """Return the ancestors of heads that are not ancestors of common.
620 622
621 623 More specifically, return a list of nodes N such that every N
622 624 satisfies the following constraints:
623 625
624 626 1. N is an ancestor of some node in 'heads'
625 627 2. N is not an ancestor of any node in 'common'
626 628
627 629 The list is sorted by revision number, meaning it is
628 630 topologically sorted.
629 631
630 632 'heads' and 'common' are both lists of node IDs. If heads is
631 633 not supplied, uses all of the revlog's heads. If common is not
632 634 supplied, uses nullid."""
633 635 if common is None:
634 636 common = [nullid]
635 637 if heads is None:
636 638 heads = self.heads()
637 639
638 640 common = [self.rev(n) for n in common]
639 641 heads = [self.rev(n) for n in heads]
640 642
641 643 # we want the ancestors, but inclusive
642 644 has = set(self.ancestors(*common))
643 645 has.add(nullrev)
644 646 has.update(common)
645 647
646 648 # take all ancestors from heads that aren't in has
647 649 missing = set()
648 650 visit = [r for r in heads if r not in has]
649 651 while visit:
650 652 r = visit.pop(0)
651 653 if r in missing:
652 654 continue
653 655 else:
654 656 missing.add(r)
655 657 for p in self.parentrevs(r):
656 658 if p not in has:
657 659 visit.append(p)
658 660 missing = list(missing)
659 661 missing.sort()
660 662 return [self.node(r) for r in missing]
661 663
662 664 def nodesbetween(self, roots=None, heads=None):
663 665 """Return a topological path from 'roots' to 'heads'.
664 666
665 667 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
666 668 topologically sorted list of all nodes N that satisfy both of
667 669 these constraints:
668 670
669 671 1. N is a descendant of some node in 'roots'
670 672 2. N is an ancestor of some node in 'heads'
671 673
672 674 Every node is considered to be both a descendant and an ancestor
673 675 of itself, so every reachable node in 'roots' and 'heads' will be
674 676 included in 'nodes'.
675 677
676 678 'outroots' is the list of reachable nodes in 'roots', i.e., the
677 679 subset of 'roots' that is returned in 'nodes'. Likewise,
678 680 'outheads' is the subset of 'heads' that is also in 'nodes'.
679 681
680 682 'roots' and 'heads' are both lists of node IDs. If 'roots' is
681 683 unspecified, uses nullid as the only root. If 'heads' is
682 684 unspecified, uses list of all of the revlog's heads."""
683 685 nonodes = ([], [], [])
684 686 if roots is not None:
685 687 roots = list(roots)
686 688 if not roots:
687 689 return nonodes
688 690 lowestrev = min([self.rev(n) for n in roots])
689 691 else:
690 692 roots = [nullid] # Everybody's a descendent of nullid
691 693 lowestrev = nullrev
692 694 if (lowestrev == nullrev) and (heads is None):
693 695 # We want _all_ the nodes!
694 696 return ([self.node(r) for r in self], [nullid], list(self.heads()))
695 697 if heads is None:
696 698 # All nodes are ancestors, so the latest ancestor is the last
697 699 # node.
698 700 highestrev = len(self) - 1
699 701 # Set ancestors to None to signal that every node is an ancestor.
700 702 ancestors = None
701 703 # Set heads to an empty dictionary for later discovery of heads
702 704 heads = {}
703 705 else:
704 706 heads = list(heads)
705 707 if not heads:
706 708 return nonodes
707 709 ancestors = set()
708 710 # Turn heads into a dictionary so we can remove 'fake' heads.
709 711 # Also, later we will be using it to filter out the heads we can't
710 712 # find from roots.
711 713 heads = dict.fromkeys(heads, 0)
712 714 # Start at the top and keep marking parents until we're done.
713 715 nodestotag = set(heads)
714 716 # Remember where the top was so we can use it as a limit later.
715 717 highestrev = max([self.rev(n) for n in nodestotag])
716 718 while nodestotag:
717 719 # grab a node to tag
718 720 n = nodestotag.pop()
719 721 # Never tag nullid
720 722 if n == nullid:
721 723 continue
722 724 # A node's revision number represents its place in a
723 725 # topologically sorted list of nodes.
724 726 r = self.rev(n)
725 727 if r >= lowestrev:
726 728 if n not in ancestors:
727 729 # If we are possibly a descendent of one of the roots
728 730 # and we haven't already been marked as an ancestor
729 731 ancestors.add(n) # Mark as ancestor
730 732 # Add non-nullid parents to list of nodes to tag.
731 733 nodestotag.update([p for p in self.parents(n) if
732 734 p != nullid])
733 735 elif n in heads: # We've seen it before, is it a fake head?
734 736 # So it is, real heads should not be the ancestors of
735 737 # any other heads.
736 738 heads.pop(n)
737 739 if not ancestors:
738 740 return nonodes
739 741 # Now that we have our set of ancestors, we want to remove any
740 742 # roots that are not ancestors.
741 743
742 744 # If one of the roots was nullid, everything is included anyway.
743 745 if lowestrev > nullrev:
744 746 # But, since we weren't, let's recompute the lowest rev to not
745 747 # include roots that aren't ancestors.
746 748
747 749 # Filter out roots that aren't ancestors of heads
748 750 roots = [n for n in roots if n in ancestors]
749 751 # Recompute the lowest revision
750 752 if roots:
751 753 lowestrev = min([self.rev(n) for n in roots])
752 754 else:
753 755 # No more roots? Return empty list
754 756 return nonodes
755 757 else:
756 758 # We are descending from nullid, and don't need to care about
757 759 # any other roots.
758 760 lowestrev = nullrev
759 761 roots = [nullid]
760 762 # Transform our roots list into a set.
761 763 descendents = set(roots)
762 764 # Also, keep the original roots so we can filter out roots that aren't
763 765 # 'real' roots (i.e. are descended from other roots).
764 766 roots = descendents.copy()
765 767 # Our topologically sorted list of output nodes.
766 768 orderedout = []
767 769 # Don't start at nullid since we don't want nullid in our output list,
768 770 # and if nullid shows up in descedents, empty parents will look like
769 771 # they're descendents.
770 772 for r in xrange(max(lowestrev, 0), highestrev + 1):
771 773 n = self.node(r)
772 774 isdescendent = False
773 775 if lowestrev == nullrev: # Everybody is a descendent of nullid
774 776 isdescendent = True
775 777 elif n in descendents:
776 778 # n is already a descendent
777 779 isdescendent = True
778 780 # This check only needs to be done here because all the roots
779 781 # will start being marked is descendents before the loop.
780 782 if n in roots:
781 783 # If n was a root, check if it's a 'real' root.
782 784 p = tuple(self.parents(n))
783 785 # If any of its parents are descendents, it's not a root.
784 786 if (p[0] in descendents) or (p[1] in descendents):
785 787 roots.remove(n)
786 788 else:
787 789 p = tuple(self.parents(n))
788 790 # A node is a descendent if either of its parents are
789 791 # descendents. (We seeded the dependents list with the roots
790 792 # up there, remember?)
791 793 if (p[0] in descendents) or (p[1] in descendents):
792 794 descendents.add(n)
793 795 isdescendent = True
794 796 if isdescendent and ((ancestors is None) or (n in ancestors)):
795 797 # Only include nodes that are both descendents and ancestors.
796 798 orderedout.append(n)
797 799 if (ancestors is not None) and (n in heads):
798 800 # We're trying to figure out which heads are reachable
799 801 # from roots.
800 802 # Mark this head as having been reached
801 803 heads[n] = 1
802 804 elif ancestors is None:
803 805 # Otherwise, we're trying to discover the heads.
804 806 # Assume this is a head because if it isn't, the next step
805 807 # will eventually remove it.
806 808 heads[n] = 1
807 809 # But, obviously its parents aren't.
808 810 for p in self.parents(n):
809 811 heads.pop(p, None)
810 812 heads = [n for n in heads.iterkeys() if heads[n] != 0]
811 813 roots = list(roots)
812 814 assert orderedout
813 815 assert roots
814 816 assert heads
815 817 return (orderedout, roots, heads)
816 818
817 819 def heads(self, start=None, stop=None):
818 820 """return the list of all nodes that have no children
819 821
820 822 if start is specified, only heads that are descendants of
821 823 start will be returned
822 824 if stop is specified, it will consider all the revs from stop
823 825 as if they had no children
824 826 """
825 827 if start is None and stop is None:
826 828 count = len(self)
827 829 if not count:
828 830 return [nullid]
829 831 ishead = [1] * (count + 1)
830 832 index = self.index
831 833 for r in xrange(count):
832 834 e = index[r]
833 835 ishead[e[5]] = ishead[e[6]] = 0
834 836 return [self.node(r) for r in xrange(count) if ishead[r]]
835 837
836 838 if start is None:
837 839 start = nullid
838 840 if stop is None:
839 841 stop = []
840 842 stoprevs = set([self.rev(n) for n in stop])
841 843 startrev = self.rev(start)
842 844 reachable = set((startrev,))
843 845 heads = set((startrev,))
844 846
845 847 parentrevs = self.parentrevs
846 848 for r in xrange(startrev + 1, len(self)):
847 849 for p in parentrevs(r):
848 850 if p in reachable:
849 851 if r not in stoprevs:
850 852 reachable.add(r)
851 853 heads.add(r)
852 854 if p in heads and p not in stoprevs:
853 855 heads.remove(p)
854 856
855 857 return [self.node(r) for r in heads]
856 858
857 859 def children(self, node):
858 860 """find the children of a given node"""
859 861 c = []
860 862 p = self.rev(node)
861 863 for r in range(p + 1, len(self)):
862 864 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
863 865 if prevs:
864 866 for pr in prevs:
865 867 if pr == p:
866 868 c.append(self.node(r))
867 869 elif p == nullrev:
868 870 c.append(self.node(r))
869 871 return c
870 872
871 873 def descendant(self, start, end):
872 874 for i in self.descendants(start):
873 875 if i == end:
874 876 return True
875 877 elif i > end:
876 878 break
877 879 return False
878 880
879 881 def ancestor(self, a, b):
880 882 """calculate the least common ancestor of nodes a and b"""
881 883
882 884 # fast path, check if it is a descendant
883 885 a, b = self.rev(a), self.rev(b)
884 886 start, end = sorted((a, b))
885 887 if self.descendant(start, end):
886 888 return self.node(start)
887 889
888 890 def parents(rev):
889 891 return [p for p in self.parentrevs(rev) if p != nullrev]
890 892
891 893 c = ancestor.ancestor(a, b, parents)
892 894 if c is None:
893 895 return nullid
894 896
895 897 return self.node(c)
896 898
897 899 def _match(self, id):
898 900 if isinstance(id, (long, int)):
899 901 # rev
900 902 return self.node(id)
901 903 if len(id) == 20:
902 904 # possibly a binary node
903 905 # odds of a binary node being all hex in ASCII are 1 in 10**25
904 906 try:
905 907 node = id
906 908 self.rev(node) # quick search the index
907 909 return node
908 910 except LookupError:
909 911 pass # may be partial hex id
910 912 try:
911 913 # str(rev)
912 914 rev = int(id)
913 915 if str(rev) != id:
914 916 raise ValueError
915 917 if rev < 0:
916 918 rev = len(self) + rev
917 919 if rev < 0 or rev >= len(self):
918 920 raise ValueError
919 921 return self.node(rev)
920 922 except (ValueError, OverflowError):
921 923 pass
922 924 if len(id) == 40:
923 925 try:
924 926 # a full hex nodeid?
925 927 node = bin(id)
926 928 self.rev(node)
927 929 return node
928 930 except (TypeError, LookupError):
929 931 pass
930 932
931 933 def _partialmatch(self, id):
932 934 if len(id) < 40:
933 935 try:
934 936 # hex(node)[:...]
935 937 l = len(id) // 2 # grab an even number of digits
936 938 bin_id = bin(id[:l * 2])
937 939 nl = [n for n in self.nodemap if n[:l] == bin_id]
938 940 nl = [n for n in nl if hex(n).startswith(id)]
939 941 if len(nl) > 0:
940 942 if len(nl) == 1:
941 943 return nl[0]
942 944 raise LookupError(id, self.indexfile,
943 945 _('ambiguous identifier'))
944 946 return None
945 947 except TypeError:
946 948 pass
947 949
948 950 def lookup(self, id):
949 951 """locate a node based on:
950 952 - revision number or str(revision number)
951 953 - nodeid or subset of hex nodeid
952 954 """
953 955 n = self._match(id)
954 956 if n is not None:
955 957 return n
956 958 n = self._partialmatch(id)
957 959 if n:
958 960 return n
959 961
960 962 raise LookupError(id, self.indexfile, _('no match found'))
961 963
962 964 def cmp(self, node, text):
963 965 """compare text with a given file revision
964 966
965 967 returns True if text is different than what is stored.
966 968 """
967 969 p1, p2 = self.parents(node)
968 970 return hash(text, p1, p2) != node
969 971
970 972 def _addchunk(self, offset, data):
971 973 o, d = self._chunkcache
972 974 # try to add to existing cache
973 975 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
974 976 self._chunkcache = o, d + data
975 977 else:
976 978 self._chunkcache = offset, data
977 979
978 980 def _loadchunk(self, offset, length):
979 981 if self._inline:
980 982 df = self.opener(self.indexfile)
981 983 else:
982 984 df = self.opener(self.datafile)
983 985
984 986 readahead = max(65536, length)
985 987 df.seek(offset)
986 988 d = df.read(readahead)
987 989 self._addchunk(offset, d)
988 990 if readahead > length:
989 991 return d[:length]
990 992 return d
991 993
992 994 def _getchunk(self, offset, length):
993 995 o, d = self._chunkcache
994 996 l = len(d)
995 997
996 998 # is it in the cache?
997 999 cachestart = offset - o
998 1000 cacheend = cachestart + length
999 1001 if cachestart >= 0 and cacheend <= l:
1000 1002 if cachestart == 0 and cacheend == l:
1001 1003 return d # avoid a copy
1002 1004 return d[cachestart:cacheend]
1003 1005
1004 1006 return self._loadchunk(offset, length)
1005 1007
1006 1008 def _chunkraw(self, startrev, endrev):
1007 1009 start = self.start(startrev)
1008 1010 length = self.end(endrev) - start
1009 1011 if self._inline:
1010 1012 start += (startrev + 1) * self._io.size
1011 1013 return self._getchunk(start, length)
1012 1014
1013 1015 def _chunk(self, rev):
1014 1016 return decompress(self._chunkraw(rev, rev))
1015 1017
1016 1018 def _chunkclear(self):
1017 1019 self._chunkcache = (0, '')
1018 1020
1019 1021 def deltaparent(self, rev):
1020 1022 """return previous revision or parentrev according to flags"""
1021 1023 if self.base(rev) == rev:
1022 1024 return nullrev
1023 1025 elif self.flags(rev) & REVIDX_PARENTDELTA:
1024 1026 return self.parentrevs(rev)[0]
1025 1027 else:
1026 1028 return rev - 1
1027 1029
1028 1030
1029 1031 def deltachain(self, rev, cache):
1030 1032 """return chain of revisions to construct a given revision"""
1031 1033 chain = []
1032 1034 check = False
1033 while self.base(rev) != rev and rev != cache:
1035 index = self.index
1036 e = index[rev]
1037 while rev != e[3] and rev != cache:
1034 1038 chain.append(rev)
1035 rev = self.deltaparent(rev)
1039 if e[0] & REVIDX_PARENTDELTA:
1040 rev = e[5]
1041 else:
1042 rev -= 1
1043 e = index[rev]
1036 1044 chain.reverse()
1037 1045 if rev == cache:
1038 1046 check = True
1039 1047 return check, rev, chain
1040 1048
1041 1049 def revdiff(self, rev1, rev2):
1042 1050 """return or calculate a delta between two revisions"""
1043 1051 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1044 1052 return self._chunk(rev2)
1045 1053
1046 1054 return mdiff.textdiff(self.revision(self.node(rev1)),
1047 1055 self.revision(self.node(rev2)))
1048 1056
1049 1057 def revision(self, node):
1050 1058 """return an uncompressed revision of a given node"""
1051 1059 cache = nullrev
1052 1060 if node == nullid:
1053 1061 return ""
1054 1062 if self._cache:
1055 1063 cache = self._cache[1]
1056 1064 if self._cache[0] == node:
1057 1065 return self._cache[2]
1058 1066
1059 1067 # look up what we need to read
1060 1068 text = None
1061 1069 rev = self.rev(node)
1062 1070 cache, base, chain = self.deltachain(rev, cache)
1063 1071
1064 1072 # check rev flags
1065 1073 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1066 1074 raise RevlogError(_('incompatible revision flag %x') %
1067 1075 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1068 1076
1069 1077 # do we have useful data cached?
1070 1078 if cache and self._cache:
1079 global _cached
1080 _cached += 1
1071 1081 text = self._cache[2]
1082 else:
1083 global _uncached
1084 _uncached += 1
1085
1072 1086
1073 1087 # drop cache to save memory
1074 1088 self._cache = None
1075 1089
1076 1090 self._loadindex(base, rev + 1)
1077 1091 self._chunkraw(base, rev)
1078 1092 if text is None:
1079 1093 text = self._chunk(base)
1080 1094
1081 1095 bins = [self._chunk(r) for r in chain]
1082 1096 text = mdiff.patches(text, bins)
1083 1097 p1, p2 = self.parents(node)
1084 1098 if (node != hash(text, p1, p2) and
1085 1099 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1086 1100 raise RevlogError(_("integrity check failed on %s:%d")
1087 1101 % (self.indexfile, rev))
1088 1102
1089 1103 self._cache = (node, rev, text)
1090 1104 return text
1091 1105
1092 1106 def checkinlinesize(self, tr, fp=None):
1093 1107 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1094 1108 return
1095 1109
1096 1110 trinfo = tr.find(self.indexfile)
1097 1111 if trinfo is None:
1098 1112 raise RevlogError(_("%s not found in the transaction")
1099 1113 % self.indexfile)
1100 1114
1101 1115 trindex = trinfo[2]
1102 1116 dataoff = self.start(trindex)
1103 1117
1104 1118 tr.add(self.datafile, dataoff)
1105 1119
1106 1120 if fp:
1107 1121 fp.flush()
1108 1122 fp.close()
1109 1123
1110 1124 df = self.opener(self.datafile, 'w')
1111 1125 try:
1112 1126 for r in self:
1113 1127 df.write(self._chunkraw(r, r))
1114 1128 finally:
1115 1129 df.close()
1116 1130
1117 1131 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1118 1132 self.version &= ~(REVLOGNGINLINEDATA)
1119 1133 self._inline = False
1120 1134 for i in self:
1121 1135 e = self._io.packentry(self.index[i], self.node, self.version, i)
1122 1136 fp.write(e)
1123 1137
1124 1138 # if we don't call rename, the temp file will never replace the
1125 1139 # real index
1126 1140 fp.rename()
1127 1141
1128 1142 tr.replace(self.indexfile, trindex * self._io.size)
1129 1143 self._chunkclear()
1130 1144
1131 1145 def addrevision(self, text, transaction, link, p1, p2, d=None):
1132 1146 """add a revision to the log
1133 1147
1134 1148 text - the revision data to add
1135 1149 transaction - the transaction object used for rollback
1136 1150 link - the linkrev data to add
1137 1151 p1, p2 - the parent nodeids of the revision
1138 1152 d - an optional precomputed delta
1139 1153 """
1140 1154 dfh = None
1141 1155 if not self._inline:
1142 1156 dfh = self.opener(self.datafile, "a")
1143 1157 ifh = self.opener(self.indexfile, "a+")
1144 1158 try:
1145 1159 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1146 1160 finally:
1147 1161 if dfh:
1148 1162 dfh.close()
1149 1163 ifh.close()
1150 1164
1151 1165 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1152 1166 node = hash(text, p1, p2)
1153 1167 if node in self.nodemap:
1154 1168 return node
1155 1169
1156 1170 curr = len(self)
1157 1171 prev = curr - 1
1158 1172 base = self.base(prev)
1159 1173 offset = self.end(prev)
1160 1174 flags = 0
1161 1175
1162 1176 if curr:
1163 1177 if not d:
1164 1178 if self._parentdelta:
1165 1179 ptext = self.revision(p1)
1166 1180 flags = REVIDX_PARENTDELTA
1167 1181 else:
1168 1182 ptext = self.revision(self.node(prev))
1169 1183 d = mdiff.textdiff(ptext, text)
1170 1184 data = compress(d)
1171 1185 l = len(data[1]) + len(data[0])
1172 1186 dist = l + offset - self.start(base)
1173 1187
1174 1188 # full versions are inserted when the needed deltas
1175 1189 # become comparable to the uncompressed text
1176 1190 # or the base revision is punched
1177 1191 if (not curr or dist > len(text) * 2 or
1178 1192 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1179 1193 data = compress(text)
1180 1194 l = len(data[1]) + len(data[0])
1181 1195 base = curr
1182 1196
1183 1197 e = (offset_type(offset, flags), l, len(text),
1184 1198 base, link, self.rev(p1), self.rev(p2), node)
1185 1199 self.index.insert(-1, e)
1186 1200 self.nodemap[node] = curr
1187 1201
1188 1202 entry = self._io.packentry(e, self.node, self.version, curr)
1189 1203 if not self._inline:
1190 1204 transaction.add(self.datafile, offset)
1191 1205 transaction.add(self.indexfile, curr * len(entry))
1192 1206 if data[0]:
1193 1207 dfh.write(data[0])
1194 1208 dfh.write(data[1])
1195 1209 dfh.flush()
1196 1210 ifh.write(entry)
1197 1211 else:
1198 1212 offset += curr * self._io.size
1199 1213 transaction.add(self.indexfile, offset, curr)
1200 1214 ifh.write(entry)
1201 1215 ifh.write(data[0])
1202 1216 ifh.write(data[1])
1203 1217 self.checkinlinesize(transaction, ifh)
1204 1218
1205 1219 if type(text) == str: # only accept immutable objects
1206 1220 self._cache = (node, curr, text)
1207 1221 return node
1208 1222
1209 1223 def group(self, nodelist, lookup, infocollect=None):
1210 1224 """Calculate a delta group, yielding a sequence of changegroup chunks
1211 1225 (strings).
1212 1226
1213 1227 Given a list of changeset revs, return a set of deltas and
1214 1228 metadata corresponding to nodes. the first delta is
1215 1229 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1216 1230 have this parent as it has all history before these
1217 1231 changesets. parent is parent[0]
1218 1232 """
1219 1233
1220 1234 revs = [self.rev(n) for n in nodelist]
1221 1235
1222 1236 # if we don't have any revisions touched by these changesets, bail
1223 1237 if not revs:
1224 1238 yield changegroup.closechunk()
1225 1239 return
1226 1240
1227 1241 # add the parent of the first rev
1228 1242 p = self.parentrevs(revs[0])[0]
1229 1243 revs.insert(0, p)
1230 1244
1231 1245 # build deltas
1232 1246 for d in xrange(len(revs) - 1):
1233 1247 a, b = revs[d], revs[d + 1]
1234 1248 nb = self.node(b)
1235 1249
1236 1250 if infocollect is not None:
1237 1251 infocollect(nb)
1238 1252
1239 1253 p = self.parents(nb)
1240 1254 meta = nb + p[0] + p[1] + lookup(nb)
1241 1255 if a == -1:
1242 1256 d = self.revision(nb)
1243 1257 meta += mdiff.trivialdiffheader(len(d))
1244 1258 else:
1245 1259 d = self.revdiff(a, b)
1246 1260 yield changegroup.chunkheader(len(meta) + len(d))
1247 1261 yield meta
1248 1262 yield d
1249 1263
1250 1264 yield changegroup.closechunk()
1251 1265
1252 1266 def addgroup(self, revs, linkmapper, transaction):
1253 1267 """
1254 1268 add a delta group
1255 1269
1256 1270 given a set of deltas, add them to the revision log. the
1257 1271 first delta is against its parent, which should be in our
1258 1272 log, the rest are against the previous delta.
1259 1273 """
1260 1274
1261 1275 #track the base of the current delta log
1262 1276 r = len(self)
1263 1277 t = r - 1
1264 1278 node = None
1265 1279
1266 1280 base = prev = nullrev
1267 1281 start = end = textlen = 0
1268 1282 if r:
1269 1283 end = self.end(t)
1270 1284
1271 1285 ifh = self.opener(self.indexfile, "a+")
1272 1286 isize = r * self._io.size
1273 1287 if self._inline:
1274 1288 transaction.add(self.indexfile, end + isize, r)
1275 1289 dfh = None
1276 1290 else:
1277 1291 transaction.add(self.indexfile, isize, r)
1278 1292 transaction.add(self.datafile, end)
1279 1293 dfh = self.opener(self.datafile, "a")
1280 1294
1281 1295 try:
1282 1296 # loop through our set of deltas
1283 1297 chain = None
1284 1298 for chunk in revs:
1285 1299 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1286 1300 link = linkmapper(cs)
1287 1301 if node in self.nodemap:
1288 1302 # this can happen if two branches make the same change
1289 1303 chain = node
1290 1304 continue
1291 1305 delta = buffer(chunk, 80)
1292 1306 del chunk
1293 1307
1294 1308 for p in (p1, p2):
1295 1309 if not p in self.nodemap:
1296 1310 raise LookupError(p, self.indexfile, _('unknown parent'))
1297 1311
1298 1312 if not chain:
1299 1313 # retrieve the parent revision of the delta chain
1300 1314 chain = p1
1301 1315 if not chain in self.nodemap:
1302 1316 raise LookupError(chain, self.indexfile, _('unknown base'))
1303 1317
1304 1318 # full versions are inserted when the needed deltas become
1305 1319 # comparable to the uncompressed text or when the previous
1306 1320 # version is not the one we have a delta against. We use
1307 1321 # the size of the previous full rev as a proxy for the
1308 1322 # current size.
1309 1323
1310 1324 if chain == prev:
1311 1325 cdelta = compress(delta)
1312 1326 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1313 1327 textlen = mdiff.patchedsize(textlen, delta)
1314 1328
1315 1329 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1316 1330 # flush our writes here so we can read it in revision
1317 1331 if dfh:
1318 1332 dfh.flush()
1319 1333 ifh.flush()
1320 1334 text = self.revision(chain)
1321 1335 if len(text) == 0:
1322 1336 # skip over trivial delta header
1323 1337 text = buffer(delta, 12)
1324 1338 else:
1325 1339 text = mdiff.patches(text, [delta])
1326 1340 del delta
1327 1341 chk = self._addrevision(text, transaction, link, p1, p2, None,
1328 1342 ifh, dfh)
1329 1343 if not dfh and not self._inline:
1330 1344 # addrevision switched from inline to conventional
1331 1345 # reopen the index
1332 1346 dfh = self.opener(self.datafile, "a")
1333 1347 ifh = self.opener(self.indexfile, "a")
1334 1348 if chk != node:
1335 1349 raise RevlogError(_("consistency error adding group"))
1336 1350 textlen = len(text)
1337 1351 else:
1338 1352 e = (offset_type(end, 0), cdeltalen, textlen, base,
1339 1353 link, self.rev(p1), self.rev(p2), node)
1340 1354 self.index.insert(-1, e)
1341 1355 self.nodemap[node] = r
1342 1356 entry = self._io.packentry(e, self.node, self.version, r)
1343 1357 if self._inline:
1344 1358 ifh.write(entry)
1345 1359 ifh.write(cdelta[0])
1346 1360 ifh.write(cdelta[1])
1347 1361 self.checkinlinesize(transaction, ifh)
1348 1362 if not self._inline:
1349 1363 dfh = self.opener(self.datafile, "a")
1350 1364 ifh = self.opener(self.indexfile, "a")
1351 1365 else:
1352 1366 dfh.write(cdelta[0])
1353 1367 dfh.write(cdelta[1])
1354 1368 ifh.write(entry)
1355 1369
1356 1370 t, r, chain, prev = r, r + 1, node, node
1357 1371 base = self.base(t)
1358 1372 start = self.start(base)
1359 1373 end = self.end(t)
1360 1374 finally:
1361 1375 if dfh:
1362 1376 dfh.close()
1363 1377 ifh.close()
1364 1378
1365 1379 return node
1366 1380
1367 1381 def strip(self, minlink, transaction):
1368 1382 """truncate the revlog on the first revision with a linkrev >= minlink
1369 1383
1370 1384 This function is called when we're stripping revision minlink and
1371 1385 its descendants from the repository.
1372 1386
1373 1387 We have to remove all revisions with linkrev >= minlink, because
1374 1388 the equivalent changelog revisions will be renumbered after the
1375 1389 strip.
1376 1390
1377 1391 So we truncate the revlog on the first of these revisions, and
1378 1392 trust that the caller has saved the revisions that shouldn't be
1379 1393 removed and that it'll readd them after this truncation.
1380 1394 """
1381 1395 if len(self) == 0:
1382 1396 return
1383 1397
1384 1398 if isinstance(self.index, lazyindex):
1385 1399 self._loadindexmap()
1386 1400
1387 1401 for rev in self:
1388 1402 if self.index[rev][4] >= minlink:
1389 1403 break
1390 1404 else:
1391 1405 return
1392 1406
1393 1407 # first truncate the files on disk
1394 1408 end = self.start(rev)
1395 1409 if not self._inline:
1396 1410 transaction.add(self.datafile, end)
1397 1411 end = rev * self._io.size
1398 1412 else:
1399 1413 end += rev * self._io.size
1400 1414
1401 1415 transaction.add(self.indexfile, end)
1402 1416
1403 1417 # then reset internal state in memory to forget those revisions
1404 1418 self._cache = None
1405 1419 self._chunkclear()
1406 1420 for x in xrange(rev, len(self)):
1407 1421 del self.nodemap[self.node(x)]
1408 1422
1409 1423 del self.index[rev:-1]
1410 1424
1411 1425 def checksize(self):
1412 1426 expected = 0
1413 1427 if len(self):
1414 1428 expected = max(0, self.end(len(self) - 1))
1415 1429
1416 1430 try:
1417 1431 f = self.opener(self.datafile)
1418 1432 f.seek(0, 2)
1419 1433 actual = f.tell()
1420 1434 dd = actual - expected
1421 1435 except IOError, inst:
1422 1436 if inst.errno != errno.ENOENT:
1423 1437 raise
1424 1438 dd = 0
1425 1439
1426 1440 try:
1427 1441 f = self.opener(self.indexfile)
1428 1442 f.seek(0, 2)
1429 1443 actual = f.tell()
1430 1444 s = self._io.size
1431 1445 i = max(0, actual // s)
1432 1446 di = actual - (i * s)
1433 1447 if self._inline:
1434 1448 databytes = 0
1435 1449 for r in self:
1436 1450 databytes += max(0, self.length(r))
1437 1451 dd = 0
1438 1452 di = actual - len(self) * s - databytes
1439 1453 except IOError, inst:
1440 1454 if inst.errno != errno.ENOENT:
1441 1455 raise
1442 1456 di = 0
1443 1457
1444 1458 return (dd, di)
1445 1459
1446 1460 def files(self):
1447 1461 res = [self.indexfile]
1448 1462 if not self._inline:
1449 1463 res.append(self.datafile)
1450 1464 return res
General Comments 0
You need to be logged in to leave comments. Login now