##// END OF EJS Templates
revlog: extract delta building to a subfunction
Matt Mackall -
r12888:ad01fe38 stable
parent child Browse files
Show More
@@ -1,1464 +1,1469 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # amount of data read unconditionally, should be >= 4
42 42 # when not inline: threshold for using lazy index
43 43 _prereadsize = 1048576
44 44 # max size of revlog with inline data
45 45 _maxinline = 131072
46 46
47 47 RevlogError = error.RevlogError
48 48 LookupError = error.LookupError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def compress(text):
83 83 """ generate a possibly-compressed representation of text """
84 84 if not text:
85 85 return ("", text)
86 86 l = len(text)
87 87 bin = None
88 88 if l < 44:
89 89 pass
90 90 elif l > 1000000:
91 91 # zlib makes an internal copy, thus doubling memory usage for
92 92 # large files, so lets do this in pieces
93 93 z = zlib.compressobj()
94 94 p = []
95 95 pos = 0
96 96 while pos < l:
97 97 pos2 = pos + 2**20
98 98 p.append(z.compress(text[pos:pos2]))
99 99 pos = pos2
100 100 p.append(z.flush())
101 101 if sum(map(len, p)) < l:
102 102 bin = "".join(p)
103 103 else:
104 104 bin = _compress(text)
105 105 if bin is None or len(bin) > l:
106 106 if text[0] == '\0':
107 107 return ("", text)
108 108 return ('u', text)
109 109 return ("", bin)
110 110
111 111 def decompress(bin):
112 112 """ decompress the given input """
113 113 if not bin:
114 114 return bin
115 115 t = bin[0]
116 116 if t == '\0':
117 117 return bin
118 118 if t == 'x':
119 119 return _decompress(bin)
120 120 if t == 'u':
121 121 return bin[1:]
122 122 raise RevlogError(_("unknown compression type %r") % t)
123 123
124 124 class lazyparser(object):
125 125 """
126 126 this class avoids the need to parse the entirety of large indices
127 127 """
128 128
129 129 # lazyparser is not safe to use on windows if win32 extensions not
130 130 # available. it keeps file handle open, which make it not possible
131 131 # to break hardlinks on local cloned repos.
132 132
133 133 def __init__(self, dataf):
134 134 try:
135 135 size = util.fstat(dataf).st_size
136 136 except AttributeError:
137 137 size = 0
138 138 self.dataf = dataf
139 139 self.s = struct.calcsize(indexformatng)
140 140 self.datasize = size
141 141 self.l = size // self.s
142 142 self.index = [None] * self.l
143 143 self.map = {nullid: nullrev}
144 144 self.allmap = 0
145 145 self.all = 0
146 146 self.mapfind_count = 0
147 147
148 148 def loadmap(self):
149 149 """
150 150 during a commit, we need to make sure the rev being added is
151 151 not a duplicate. This requires loading the entire index,
152 152 which is fairly slow. loadmap can load up just the node map,
153 153 which takes much less time.
154 154 """
155 155 if self.allmap:
156 156 return
157 157 end = self.datasize
158 158 self.allmap = 1
159 159 cur = 0
160 160 count = 0
161 161 blocksize = self.s * 256
162 162 self.dataf.seek(0)
163 163 while cur < end:
164 164 data = self.dataf.read(blocksize)
165 165 off = 0
166 166 for x in xrange(256):
167 167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 168 self.map[n] = count
169 169 count += 1
170 170 if count >= self.l:
171 171 break
172 172 off += self.s
173 173 cur += blocksize
174 174
175 175 def loadblock(self, blockstart, blocksize, data=None):
176 176 if self.all:
177 177 return
178 178 if data is None:
179 179 self.dataf.seek(blockstart)
180 180 if blockstart + blocksize > self.datasize:
181 181 # the revlog may have grown since we've started running,
182 182 # but we don't have space in self.index for more entries.
183 183 # limit blocksize so that we don't get too much data.
184 184 blocksize = max(self.datasize - blockstart, 0)
185 185 data = self.dataf.read(blocksize)
186 186 lend = len(data) // self.s
187 187 i = blockstart // self.s
188 188 off = 0
189 189 # lazyindex supports __delitem__
190 190 if lend > len(self.index) - i:
191 191 lend = len(self.index) - i
192 192 for x in xrange(lend):
193 193 if self.index[i + x] is None:
194 194 b = data[off : off + self.s]
195 195 self.index[i + x] = b
196 196 n = b[ngshaoffset:ngshaoffset + 20]
197 197 self.map[n] = i + x
198 198 off += self.s
199 199
200 200 def findnode(self, node):
201 201 """search backwards through the index file for a specific node"""
202 202 if self.allmap:
203 203 return None
204 204
205 205 # hg log will cause many many searches for the manifest
206 206 # nodes. After we get called a few times, just load the whole
207 207 # thing.
208 208 if self.mapfind_count > 8:
209 209 self.loadmap()
210 210 if node in self.map:
211 211 return node
212 212 return None
213 213 self.mapfind_count += 1
214 214 last = self.l - 1
215 215 while self.index[last] != None:
216 216 if last == 0:
217 217 self.all = 1
218 218 self.allmap = 1
219 219 return None
220 220 last -= 1
221 221 end = (last + 1) * self.s
222 222 blocksize = self.s * 256
223 223 while end >= 0:
224 224 start = max(end - blocksize, 0)
225 225 self.dataf.seek(start)
226 226 data = self.dataf.read(end - start)
227 227 findend = end - start
228 228 while True:
229 229 # we're searching backwards, so we have to make sure
230 230 # we don't find a changeset where this node is a parent
231 231 off = data.find(node, 0, findend)
232 232 findend = off
233 233 if off >= 0:
234 234 i = off / self.s
235 235 off = i * self.s
236 236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 237 if n == node:
238 238 self.map[n] = i + start / self.s
239 239 return node
240 240 else:
241 241 break
242 242 end -= blocksize
243 243 return None
244 244
245 245 def loadindex(self, i=None, end=None):
246 246 if self.all:
247 247 return
248 248 all = False
249 249 if i is None:
250 250 blockstart = 0
251 251 blocksize = (65536 / self.s) * self.s
252 252 end = self.datasize
253 253 all = True
254 254 else:
255 255 if end:
256 256 blockstart = i * self.s
257 257 end = end * self.s
258 258 blocksize = end - blockstart
259 259 else:
260 260 blockstart = (i & ~1023) * self.s
261 261 blocksize = self.s * 1024
262 262 end = blockstart + blocksize
263 263 while blockstart < end:
264 264 self.loadblock(blockstart, blocksize)
265 265 blockstart += blocksize
266 266 if all:
267 267 self.all = True
268 268
269 269 class lazyindex(object):
270 270 """a lazy version of the index array"""
271 271 def __init__(self, parser):
272 272 self.p = parser
273 273 def __len__(self):
274 274 return len(self.p.index)
275 275 def load(self, pos):
276 276 if pos < 0:
277 277 pos += len(self.p.index)
278 278 self.p.loadindex(pos)
279 279 return self.p.index[pos]
280 280 def __getitem__(self, pos):
281 281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 282 def __setitem__(self, pos, item):
283 283 self.p.index[pos] = _pack(indexformatng, *item)
284 284 def __delitem__(self, pos):
285 285 del self.p.index[pos]
286 286 def insert(self, pos, e):
287 287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 288 def append(self, e):
289 289 self.p.index.append(_pack(indexformatng, *e))
290 290
291 291 class lazymap(object):
292 292 """a lazy version of the node map"""
293 293 def __init__(self, parser):
294 294 self.p = parser
295 295 def load(self, key):
296 296 n = self.p.findnode(key)
297 297 if n is None:
298 298 raise KeyError(key)
299 299 def __contains__(self, key):
300 300 if key in self.p.map:
301 301 return True
302 302 self.p.loadmap()
303 303 return key in self.p.map
304 304 def __iter__(self):
305 305 yield nullid
306 306 for i, ret in enumerate(self.p.index):
307 307 if not ret:
308 308 self.p.loadindex(i)
309 309 ret = self.p.index[i]
310 310 if isinstance(ret, str):
311 311 ret = _unpack(indexformatng, ret)
312 312 yield ret[7]
313 313 def __getitem__(self, key):
314 314 try:
315 315 return self.p.map[key]
316 316 except KeyError:
317 317 try:
318 318 self.load(key)
319 319 return self.p.map[key]
320 320 except KeyError:
321 321 raise KeyError("node " + hex(key))
322 322 def __setitem__(self, key, val):
323 323 self.p.map[key] = val
324 324 def __delitem__(self, key):
325 325 del self.p.map[key]
326 326
327 327 indexformatv0 = ">4l20s20s20s"
328 328 v0shaoffset = 56
329 329
330 330 class revlogoldio(object):
331 331 def __init__(self):
332 332 self.size = struct.calcsize(indexformatv0)
333 333
334 334 def parseindex(self, fp, data, inline):
335 335 s = self.size
336 336 index = []
337 337 nodemap = {nullid: nullrev}
338 338 n = off = 0
339 339 if len(data) == _prereadsize:
340 340 data += fp.read() # read the rest
341 341 l = len(data)
342 342 while off + s <= l:
343 343 cur = data[off:off + s]
344 344 off += s
345 345 e = _unpack(indexformatv0, cur)
346 346 # transform to revlogv1 format
347 347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 349 index.append(e2)
350 350 nodemap[e[6]] = n
351 351 n += 1
352 352
353 353 return index, nodemap, None
354 354
355 355 def packentry(self, entry, node, version, rev):
356 356 if gettype(entry[0]):
357 357 raise RevlogError(_("index entry flags need RevlogNG"))
358 358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 359 node(entry[5]), node(entry[6]), entry[7])
360 360 return _pack(indexformatv0, *e2)
361 361
362 362 # index ng:
363 363 # 6 bytes: offset
364 364 # 2 bytes: flags
365 365 # 4 bytes: compressed length
366 366 # 4 bytes: uncompressed length
367 367 # 4 bytes: base rev
368 368 # 4 bytes: link rev
369 369 # 4 bytes: parent 1 rev
370 370 # 4 bytes: parent 2 rev
371 371 # 32 bytes: nodeid
372 372 indexformatng = ">Qiiiiii20s12x"
373 373 ngshaoffset = 32
374 374 versionformat = ">I"
375 375
376 376 class revlogio(object):
377 377 def __init__(self):
378 378 self.size = struct.calcsize(indexformatng)
379 379
380 380 def parseindex(self, fp, data, inline):
381 381 if len(data) == _prereadsize:
382 382 if util.openhardlinks() and not inline:
383 383 # big index, let's parse it on demand
384 384 parser = lazyparser(fp)
385 385 index = lazyindex(parser)
386 386 nodemap = lazymap(parser)
387 387 e = list(index[0])
388 388 type = gettype(e[0])
389 389 e[0] = offset_type(0, type)
390 390 index[0] = e
391 391 return index, nodemap, None
392 392 else:
393 393 data += fp.read()
394 394
395 395 # call the C implementation to parse the index data
396 396 index, nodemap, cache = parsers.parse_index(data, inline)
397 397 return index, nodemap, cache
398 398
399 399 def packentry(self, entry, node, version, rev):
400 400 p = _pack(indexformatng, *entry)
401 401 if rev == 0:
402 402 p = _pack(versionformat, version) + p[4:]
403 403 return p
404 404
405 405 class revlog(object):
406 406 """
407 407 the underlying revision storage object
408 408
409 409 A revlog consists of two parts, an index and the revision data.
410 410
411 411 The index is a file with a fixed record size containing
412 412 information on each revision, including its nodeid (hash), the
413 413 nodeids of its parents, the position and offset of its data within
414 414 the data file, and the revision it's based on. Finally, each entry
415 415 contains a linkrev entry that can serve as a pointer to external
416 416 data.
417 417
418 418 The revision data itself is a linear collection of data chunks.
419 419 Each chunk represents a revision and is usually represented as a
420 420 delta against the previous chunk. To bound lookup time, runs of
421 421 deltas are limited to about 2 times the length of the original
422 422 version data. This makes retrieval of a version proportional to
423 423 its size, or O(1) relative to the number of revisions.
424 424
425 425 Both pieces of the revlog are written to in an append-only
426 426 fashion, which means we never need to rewrite a file to insert or
427 427 remove data, and can use some simple techniques to avoid the need
428 428 for locking while reading.
429 429 """
430 430 def __init__(self, opener, indexfile, shallowroot=None):
431 431 """
432 432 create a revlog object
433 433
434 434 opener is a function that abstracts the file opening operation
435 435 and can be used to implement COW semantics or the like.
436 436 """
437 437 self.indexfile = indexfile
438 438 self.datafile = indexfile[:-2] + ".d"
439 439 self.opener = opener
440 440 self._cache = None
441 441 self._chunkcache = (0, '')
442 442 self.nodemap = {nullid: nullrev}
443 443 self.index = []
444 444 self._shallowroot = shallowroot
445 445 self._parentdelta = 0
446 446
447 447 v = REVLOG_DEFAULT_VERSION
448 448 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 449 v = opener.options['defversion']
450 450 if v & REVLOGNG:
451 451 v |= REVLOGNGINLINEDATA
452 452 if v & REVLOGNG and 'parentdelta' in opener.options:
453 453 self._parentdelta = 1
454 454
455 455 if shallowroot:
456 456 v |= REVLOGSHALLOW
457 457
458 458 i = ''
459 459 try:
460 460 f = self.opener(self.indexfile)
461 461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 462 i = f.read()
463 463 else:
464 464 i = f.read(_prereadsize)
465 465 if len(i) > 0:
466 466 v = struct.unpack(versionformat, i[:4])[0]
467 467 except IOError, inst:
468 468 if inst.errno != errno.ENOENT:
469 469 raise
470 470
471 471 self.version = v
472 472 self._inline = v & REVLOGNGINLINEDATA
473 473 self._shallow = v & REVLOGSHALLOW
474 474 flags = v & ~0xFFFF
475 475 fmt = v & 0xFFFF
476 476 if fmt == REVLOGV0 and flags:
477 477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 478 % (self.indexfile, flags >> 16))
479 479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 481 % (self.indexfile, flags >> 16))
482 482 elif fmt > REVLOGNG:
483 483 raise RevlogError(_("index %s unknown format %d")
484 484 % (self.indexfile, fmt))
485 485
486 486 self._io = revlogio()
487 487 if self.version == REVLOGV0:
488 488 self._io = revlogoldio()
489 489 if i:
490 490 try:
491 491 d = self._io.parseindex(f, i, self._inline)
492 492 except (ValueError, IndexError):
493 493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 494 self.index, self.nodemap, self._chunkcache = d
495 495 if not self._chunkcache:
496 496 self._chunkclear()
497 497
498 498 # add the magic null revision at -1 (if it hasn't been done already)
499 499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 500 self.index[-1][7] != nullid) :
501 501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 502
503 503 def _loadindex(self, start, end):
504 504 """load a block of indexes all at once from the lazy parser"""
505 505 if isinstance(self.index, lazyindex):
506 506 self.index.p.loadindex(start, end)
507 507
508 508 def _loadindexmap(self):
509 509 """loads both the map and the index from the lazy parser"""
510 510 if isinstance(self.index, lazyindex):
511 511 p = self.index.p
512 512 p.loadindex()
513 513 self.nodemap = p.map
514 514
515 515 def _loadmap(self):
516 516 """loads the map from the lazy parser"""
517 517 if isinstance(self.nodemap, lazymap):
518 518 self.nodemap.p.loadmap()
519 519 self.nodemap = self.nodemap.p.map
520 520
521 521 def tip(self):
522 522 return self.node(len(self.index) - 2)
523 523 def __len__(self):
524 524 return len(self.index) - 1
525 525 def __iter__(self):
526 526 for i in xrange(len(self)):
527 527 yield i
528 528 def rev(self, node):
529 529 try:
530 530 return self.nodemap[node]
531 531 except KeyError:
532 532 raise LookupError(node, self.indexfile, _('no node'))
533 533 def node(self, rev):
534 534 return self.index[rev][7]
535 535 def linkrev(self, rev):
536 536 return self.index[rev][4]
537 537 def parents(self, node):
538 538 i = self.index
539 539 d = i[self.rev(node)]
540 540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 541 def parentrevs(self, rev):
542 542 return self.index[rev][5:7]
543 543 def start(self, rev):
544 544 return int(self.index[rev][0] >> 16)
545 545 def end(self, rev):
546 546 return self.start(rev) + self.length(rev)
547 547 def length(self, rev):
548 548 return self.index[rev][1]
549 549 def base(self, rev):
550 550 return self.index[rev][3]
551 551 def flags(self, rev):
552 552 return self.index[rev][0] & 0xFFFF
553 553 def rawsize(self, rev):
554 554 """return the length of the uncompressed text for a given revision"""
555 555 l = self.index[rev][2]
556 556 if l >= 0:
557 557 return l
558 558
559 559 t = self.revision(self.node(rev))
560 560 return len(t)
561 561 size = rawsize
562 562
563 563 def reachable(self, node, stop=None):
564 564 """return the set of all nodes ancestral to a given node, including
565 565 the node itself, stopping when stop is matched"""
566 566 reachable = set((node,))
567 567 visit = [node]
568 568 if stop:
569 569 stopn = self.rev(stop)
570 570 else:
571 571 stopn = 0
572 572 while visit:
573 573 n = visit.pop(0)
574 574 if n == stop:
575 575 continue
576 576 if n == nullid:
577 577 continue
578 578 for p in self.parents(n):
579 579 if self.rev(p) < stopn:
580 580 continue
581 581 if p not in reachable:
582 582 reachable.add(p)
583 583 visit.append(p)
584 584 return reachable
585 585
586 586 def ancestors(self, *revs):
587 587 """Generate the ancestors of 'revs' in reverse topological order.
588 588
589 589 Yield a sequence of revision numbers starting with the parents
590 590 of each revision in revs, i.e., each revision is *not* considered
591 591 an ancestor of itself. Results are in breadth-first order:
592 592 parents of each rev in revs, then parents of those, etc. Result
593 593 does not include the null revision."""
594 594 visit = list(revs)
595 595 seen = set([nullrev])
596 596 while visit:
597 597 for parent in self.parentrevs(visit.pop(0)):
598 598 if parent not in seen:
599 599 visit.append(parent)
600 600 seen.add(parent)
601 601 yield parent
602 602
603 603 def descendants(self, *revs):
604 604 """Generate the descendants of 'revs' in revision order.
605 605
606 606 Yield a sequence of revision numbers starting with a child of
607 607 some rev in revs, i.e., each revision is *not* considered a
608 608 descendant of itself. Results are ordered by revision number (a
609 609 topological sort)."""
610 610 seen = set(revs)
611 611 for i in xrange(min(revs) + 1, len(self)):
612 612 for x in self.parentrevs(i):
613 613 if x != nullrev and x in seen:
614 614 seen.add(i)
615 615 yield i
616 616 break
617 617
618 618 def findmissing(self, common=None, heads=None):
619 619 """Return the ancestors of heads that are not ancestors of common.
620 620
621 621 More specifically, return a list of nodes N such that every N
622 622 satisfies the following constraints:
623 623
624 624 1. N is an ancestor of some node in 'heads'
625 625 2. N is not an ancestor of any node in 'common'
626 626
627 627 The list is sorted by revision number, meaning it is
628 628 topologically sorted.
629 629
630 630 'heads' and 'common' are both lists of node IDs. If heads is
631 631 not supplied, uses all of the revlog's heads. If common is not
632 632 supplied, uses nullid."""
633 633 if common is None:
634 634 common = [nullid]
635 635 if heads is None:
636 636 heads = self.heads()
637 637
638 638 common = [self.rev(n) for n in common]
639 639 heads = [self.rev(n) for n in heads]
640 640
641 641 # we want the ancestors, but inclusive
642 642 has = set(self.ancestors(*common))
643 643 has.add(nullrev)
644 644 has.update(common)
645 645
646 646 # take all ancestors from heads that aren't in has
647 647 missing = set()
648 648 visit = [r for r in heads if r not in has]
649 649 while visit:
650 650 r = visit.pop(0)
651 651 if r in missing:
652 652 continue
653 653 else:
654 654 missing.add(r)
655 655 for p in self.parentrevs(r):
656 656 if p not in has:
657 657 visit.append(p)
658 658 missing = list(missing)
659 659 missing.sort()
660 660 return [self.node(r) for r in missing]
661 661
662 662 def nodesbetween(self, roots=None, heads=None):
663 663 """Return a topological path from 'roots' to 'heads'.
664 664
665 665 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
666 666 topologically sorted list of all nodes N that satisfy both of
667 667 these constraints:
668 668
669 669 1. N is a descendant of some node in 'roots'
670 670 2. N is an ancestor of some node in 'heads'
671 671
672 672 Every node is considered to be both a descendant and an ancestor
673 673 of itself, so every reachable node in 'roots' and 'heads' will be
674 674 included in 'nodes'.
675 675
676 676 'outroots' is the list of reachable nodes in 'roots', i.e., the
677 677 subset of 'roots' that is returned in 'nodes'. Likewise,
678 678 'outheads' is the subset of 'heads' that is also in 'nodes'.
679 679
680 680 'roots' and 'heads' are both lists of node IDs. If 'roots' is
681 681 unspecified, uses nullid as the only root. If 'heads' is
682 682 unspecified, uses list of all of the revlog's heads."""
683 683 nonodes = ([], [], [])
684 684 if roots is not None:
685 685 roots = list(roots)
686 686 if not roots:
687 687 return nonodes
688 688 lowestrev = min([self.rev(n) for n in roots])
689 689 else:
690 690 roots = [nullid] # Everybody's a descendent of nullid
691 691 lowestrev = nullrev
692 692 if (lowestrev == nullrev) and (heads is None):
693 693 # We want _all_ the nodes!
694 694 return ([self.node(r) for r in self], [nullid], list(self.heads()))
695 695 if heads is None:
696 696 # All nodes are ancestors, so the latest ancestor is the last
697 697 # node.
698 698 highestrev = len(self) - 1
699 699 # Set ancestors to None to signal that every node is an ancestor.
700 700 ancestors = None
701 701 # Set heads to an empty dictionary for later discovery of heads
702 702 heads = {}
703 703 else:
704 704 heads = list(heads)
705 705 if not heads:
706 706 return nonodes
707 707 ancestors = set()
708 708 # Turn heads into a dictionary so we can remove 'fake' heads.
709 709 # Also, later we will be using it to filter out the heads we can't
710 710 # find from roots.
711 711 heads = dict.fromkeys(heads, 0)
712 712 # Start at the top and keep marking parents until we're done.
713 713 nodestotag = set(heads)
714 714 # Remember where the top was so we can use it as a limit later.
715 715 highestrev = max([self.rev(n) for n in nodestotag])
716 716 while nodestotag:
717 717 # grab a node to tag
718 718 n = nodestotag.pop()
719 719 # Never tag nullid
720 720 if n == nullid:
721 721 continue
722 722 # A node's revision number represents its place in a
723 723 # topologically sorted list of nodes.
724 724 r = self.rev(n)
725 725 if r >= lowestrev:
726 726 if n not in ancestors:
727 727 # If we are possibly a descendent of one of the roots
728 728 # and we haven't already been marked as an ancestor
729 729 ancestors.add(n) # Mark as ancestor
730 730 # Add non-nullid parents to list of nodes to tag.
731 731 nodestotag.update([p for p in self.parents(n) if
732 732 p != nullid])
733 733 elif n in heads: # We've seen it before, is it a fake head?
734 734 # So it is, real heads should not be the ancestors of
735 735 # any other heads.
736 736 heads.pop(n)
737 737 if not ancestors:
738 738 return nonodes
739 739 # Now that we have our set of ancestors, we want to remove any
740 740 # roots that are not ancestors.
741 741
742 742 # If one of the roots was nullid, everything is included anyway.
743 743 if lowestrev > nullrev:
744 744 # But, since we weren't, let's recompute the lowest rev to not
745 745 # include roots that aren't ancestors.
746 746
747 747 # Filter out roots that aren't ancestors of heads
748 748 roots = [n for n in roots if n in ancestors]
749 749 # Recompute the lowest revision
750 750 if roots:
751 751 lowestrev = min([self.rev(n) for n in roots])
752 752 else:
753 753 # No more roots? Return empty list
754 754 return nonodes
755 755 else:
756 756 # We are descending from nullid, and don't need to care about
757 757 # any other roots.
758 758 lowestrev = nullrev
759 759 roots = [nullid]
760 760 # Transform our roots list into a set.
761 761 descendents = set(roots)
762 762 # Also, keep the original roots so we can filter out roots that aren't
763 763 # 'real' roots (i.e. are descended from other roots).
764 764 roots = descendents.copy()
765 765 # Our topologically sorted list of output nodes.
766 766 orderedout = []
767 767 # Don't start at nullid since we don't want nullid in our output list,
768 768 # and if nullid shows up in descedents, empty parents will look like
769 769 # they're descendents.
770 770 for r in xrange(max(lowestrev, 0), highestrev + 1):
771 771 n = self.node(r)
772 772 isdescendent = False
773 773 if lowestrev == nullrev: # Everybody is a descendent of nullid
774 774 isdescendent = True
775 775 elif n in descendents:
776 776 # n is already a descendent
777 777 isdescendent = True
778 778 # This check only needs to be done here because all the roots
779 779 # will start being marked is descendents before the loop.
780 780 if n in roots:
781 781 # If n was a root, check if it's a 'real' root.
782 782 p = tuple(self.parents(n))
783 783 # If any of its parents are descendents, it's not a root.
784 784 if (p[0] in descendents) or (p[1] in descendents):
785 785 roots.remove(n)
786 786 else:
787 787 p = tuple(self.parents(n))
788 788 # A node is a descendent if either of its parents are
789 789 # descendents. (We seeded the dependents list with the roots
790 790 # up there, remember?)
791 791 if (p[0] in descendents) or (p[1] in descendents):
792 792 descendents.add(n)
793 793 isdescendent = True
794 794 if isdescendent and ((ancestors is None) or (n in ancestors)):
795 795 # Only include nodes that are both descendents and ancestors.
796 796 orderedout.append(n)
797 797 if (ancestors is not None) and (n in heads):
798 798 # We're trying to figure out which heads are reachable
799 799 # from roots.
800 800 # Mark this head as having been reached
801 801 heads[n] = 1
802 802 elif ancestors is None:
803 803 # Otherwise, we're trying to discover the heads.
804 804 # Assume this is a head because if it isn't, the next step
805 805 # will eventually remove it.
806 806 heads[n] = 1
807 807 # But, obviously its parents aren't.
808 808 for p in self.parents(n):
809 809 heads.pop(p, None)
810 810 heads = [n for n in heads.iterkeys() if heads[n] != 0]
811 811 roots = list(roots)
812 812 assert orderedout
813 813 assert roots
814 814 assert heads
815 815 return (orderedout, roots, heads)
816 816
817 817 def heads(self, start=None, stop=None):
818 818 """return the list of all nodes that have no children
819 819
820 820 if start is specified, only heads that are descendants of
821 821 start will be returned
822 822 if stop is specified, it will consider all the revs from stop
823 823 as if they had no children
824 824 """
825 825 if start is None and stop is None:
826 826 count = len(self)
827 827 if not count:
828 828 return [nullid]
829 829 ishead = [1] * (count + 1)
830 830 index = self.index
831 831 for r in xrange(count):
832 832 e = index[r]
833 833 ishead[e[5]] = ishead[e[6]] = 0
834 834 return [self.node(r) for r in xrange(count) if ishead[r]]
835 835
836 836 if start is None:
837 837 start = nullid
838 838 if stop is None:
839 839 stop = []
840 840 stoprevs = set([self.rev(n) for n in stop])
841 841 startrev = self.rev(start)
842 842 reachable = set((startrev,))
843 843 heads = set((startrev,))
844 844
845 845 parentrevs = self.parentrevs
846 846 for r in xrange(startrev + 1, len(self)):
847 847 for p in parentrevs(r):
848 848 if p in reachable:
849 849 if r not in stoprevs:
850 850 reachable.add(r)
851 851 heads.add(r)
852 852 if p in heads and p not in stoprevs:
853 853 heads.remove(p)
854 854
855 855 return [self.node(r) for r in heads]
856 856
857 857 def children(self, node):
858 858 """find the children of a given node"""
859 859 c = []
860 860 p = self.rev(node)
861 861 for r in range(p + 1, len(self)):
862 862 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
863 863 if prevs:
864 864 for pr in prevs:
865 865 if pr == p:
866 866 c.append(self.node(r))
867 867 elif p == nullrev:
868 868 c.append(self.node(r))
869 869 return c
870 870
871 871 def descendant(self, start, end):
872 872 for i in self.descendants(start):
873 873 if i == end:
874 874 return True
875 875 elif i > end:
876 876 break
877 877 return False
878 878
879 879 def ancestor(self, a, b):
880 880 """calculate the least common ancestor of nodes a and b"""
881 881
882 882 # fast path, check if it is a descendant
883 883 a, b = self.rev(a), self.rev(b)
884 884 start, end = sorted((a, b))
885 885 if self.descendant(start, end):
886 886 return self.node(start)
887 887
888 888 def parents(rev):
889 889 return [p for p in self.parentrevs(rev) if p != nullrev]
890 890
891 891 c = ancestor.ancestor(a, b, parents)
892 892 if c is None:
893 893 return nullid
894 894
895 895 return self.node(c)
896 896
897 897 def _match(self, id):
898 898 if isinstance(id, (long, int)):
899 899 # rev
900 900 return self.node(id)
901 901 if len(id) == 20:
902 902 # possibly a binary node
903 903 # odds of a binary node being all hex in ASCII are 1 in 10**25
904 904 try:
905 905 node = id
906 906 self.rev(node) # quick search the index
907 907 return node
908 908 except LookupError:
909 909 pass # may be partial hex id
910 910 try:
911 911 # str(rev)
912 912 rev = int(id)
913 913 if str(rev) != id:
914 914 raise ValueError
915 915 if rev < 0:
916 916 rev = len(self) + rev
917 917 if rev < 0 or rev >= len(self):
918 918 raise ValueError
919 919 return self.node(rev)
920 920 except (ValueError, OverflowError):
921 921 pass
922 922 if len(id) == 40:
923 923 try:
924 924 # a full hex nodeid?
925 925 node = bin(id)
926 926 self.rev(node)
927 927 return node
928 928 except (TypeError, LookupError):
929 929 pass
930 930
931 931 def _partialmatch(self, id):
932 932 if len(id) < 40:
933 933 try:
934 934 # hex(node)[:...]
935 935 l = len(id) // 2 # grab an even number of digits
936 936 bin_id = bin(id[:l * 2])
937 937 nl = [n for n in self.nodemap if n[:l] == bin_id]
938 938 nl = [n for n in nl if hex(n).startswith(id)]
939 939 if len(nl) > 0:
940 940 if len(nl) == 1:
941 941 return nl[0]
942 942 raise LookupError(id, self.indexfile,
943 943 _('ambiguous identifier'))
944 944 return None
945 945 except TypeError:
946 946 pass
947 947
948 948 def lookup(self, id):
949 949 """locate a node based on:
950 950 - revision number or str(revision number)
951 951 - nodeid or subset of hex nodeid
952 952 """
953 953 n = self._match(id)
954 954 if n is not None:
955 955 return n
956 956 n = self._partialmatch(id)
957 957 if n:
958 958 return n
959 959
960 960 raise LookupError(id, self.indexfile, _('no match found'))
961 961
962 962 def cmp(self, node, text):
963 963 """compare text with a given file revision
964 964
965 965 returns True if text is different than what is stored.
966 966 """
967 967 p1, p2 = self.parents(node)
968 968 return hash(text, p1, p2) != node
969 969
970 970 def _addchunk(self, offset, data):
971 971 o, d = self._chunkcache
972 972 # try to add to existing cache
973 973 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
974 974 self._chunkcache = o, d + data
975 975 else:
976 976 self._chunkcache = offset, data
977 977
978 978 def _loadchunk(self, offset, length):
979 979 if self._inline:
980 980 df = self.opener(self.indexfile)
981 981 else:
982 982 df = self.opener(self.datafile)
983 983
984 984 readahead = max(65536, length)
985 985 df.seek(offset)
986 986 d = df.read(readahead)
987 987 self._addchunk(offset, d)
988 988 if readahead > length:
989 989 return d[:length]
990 990 return d
991 991
992 992 def _getchunk(self, offset, length):
993 993 o, d = self._chunkcache
994 994 l = len(d)
995 995
996 996 # is it in the cache?
997 997 cachestart = offset - o
998 998 cacheend = cachestart + length
999 999 if cachestart >= 0 and cacheend <= l:
1000 1000 if cachestart == 0 and cacheend == l:
1001 1001 return d # avoid a copy
1002 1002 return d[cachestart:cacheend]
1003 1003
1004 1004 return self._loadchunk(offset, length)
1005 1005
1006 1006 def _chunkraw(self, startrev, endrev):
1007 1007 start = self.start(startrev)
1008 1008 length = self.end(endrev) - start
1009 1009 if self._inline:
1010 1010 start += (startrev + 1) * self._io.size
1011 1011 return self._getchunk(start, length)
1012 1012
1013 1013 def _chunk(self, rev):
1014 1014 return decompress(self._chunkraw(rev, rev))
1015 1015
1016 1016 def _chunkclear(self):
1017 1017 self._chunkcache = (0, '')
1018 1018
1019 1019 def deltaparent(self, rev):
1020 1020 """return previous revision or parentrev according to flags"""
1021 1021 if self.flags(rev) & REVIDX_PARENTDELTA:
1022 1022 return self.parentrevs(rev)[0]
1023 1023 else:
1024 1024 return rev - 1
1025 1025
1026 1026 def revdiff(self, rev1, rev2):
1027 1027 """return or calculate a delta between two revisions"""
1028 1028 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1029 1029 return self._chunk(rev2)
1030 1030
1031 1031 return mdiff.textdiff(self.revision(self.node(rev1)),
1032 1032 self.revision(self.node(rev2)))
1033 1033
1034 1034 def revision(self, node):
1035 1035 """return an uncompressed revision of a given node"""
1036 1036 cachedrev = None
1037 1037 if node == nullid:
1038 1038 return ""
1039 1039 if self._cache:
1040 1040 if self._cache[0] == node:
1041 1041 return self._cache[2]
1042 1042 cachedrev = self._cache[1]
1043 1043
1044 1044 # look up what we need to read
1045 1045 text = None
1046 1046 rev = self.rev(node)
1047 1047 base = self.base(rev)
1048 1048
1049 1049 # check rev flags
1050 1050 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1051 1051 raise RevlogError(_('incompatible revision flag %x') %
1052 1052 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1053 1053
1054 1054 # build delta chain
1055 1055 self._loadindex(base, rev + 1)
1056 1056 chain = []
1057 1057 index = self.index # for performance
1058 1058 iterrev = rev
1059 1059 e = index[iterrev]
1060 1060 while iterrev != base and iterrev != cachedrev:
1061 1061 chain.append(iterrev)
1062 1062 if e[0] & REVIDX_PARENTDELTA:
1063 1063 iterrev = e[5]
1064 1064 else:
1065 1065 iterrev -= 1
1066 1066 e = index[iterrev]
1067 1067 chain.reverse()
1068 1068 base = iterrev
1069 1069
1070 1070 if iterrev == cachedrev:
1071 1071 # cache hit
1072 1072 text = self._cache[2]
1073 1073
1074 1074 # drop cache to save memory
1075 1075 self._cache = None
1076 1076
1077 1077 self._chunkraw(base, rev)
1078 1078 if text is None:
1079 1079 text = self._chunk(base)
1080 1080
1081 1081 bins = [self._chunk(r) for r in chain]
1082 1082 text = mdiff.patches(text, bins)
1083 1083 p1, p2 = self.parents(node)
1084 1084 if (node != hash(text, p1, p2) and
1085 1085 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1086 1086 raise RevlogError(_("integrity check failed on %s:%d")
1087 1087 % (self.indexfile, rev))
1088 1088
1089 1089 self._cache = (node, rev, text)
1090 1090 return text
1091 1091
1092 1092 def checkinlinesize(self, tr, fp=None):
1093 1093 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1094 1094 return
1095 1095
1096 1096 trinfo = tr.find(self.indexfile)
1097 1097 if trinfo is None:
1098 1098 raise RevlogError(_("%s not found in the transaction")
1099 1099 % self.indexfile)
1100 1100
1101 1101 trindex = trinfo[2]
1102 1102 dataoff = self.start(trindex)
1103 1103
1104 1104 tr.add(self.datafile, dataoff)
1105 1105
1106 1106 if fp:
1107 1107 fp.flush()
1108 1108 fp.close()
1109 1109
1110 1110 df = self.opener(self.datafile, 'w')
1111 1111 try:
1112 1112 for r in self:
1113 1113 df.write(self._chunkraw(r, r))
1114 1114 finally:
1115 1115 df.close()
1116 1116
1117 1117 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1118 1118 self.version &= ~(REVLOGNGINLINEDATA)
1119 1119 self._inline = False
1120 1120 for i in self:
1121 1121 e = self._io.packentry(self.index[i], self.node, self.version, i)
1122 1122 fp.write(e)
1123 1123
1124 1124 # if we don't call rename, the temp file will never replace the
1125 1125 # real index
1126 1126 fp.rename()
1127 1127
1128 1128 tr.replace(self.indexfile, trindex * self._io.size)
1129 1129 self._chunkclear()
1130 1130
1131 1131 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1132 1132 """add a revision to the log
1133 1133
1134 1134 text - the revision data to add
1135 1135 transaction - the transaction object used for rollback
1136 1136 link - the linkrev data to add
1137 1137 p1, p2 - the parent nodeids of the revision
1138 1138 cachedelta - an optional precomputed delta
1139 1139 """
1140 1140 node = hash(text, p1, p2)
1141 1141 if (node in self.nodemap and
1142 1142 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1143 1143 return node
1144 1144
1145 1145 dfh = None
1146 1146 if not self._inline:
1147 1147 dfh = self.opener(self.datafile, "a")
1148 1148 ifh = self.opener(self.indexfile, "a+")
1149 1149 try:
1150 1150 return self._addrevision(node, text, transaction, link, p1, p2,
1151 1151 cachedelta, ifh, dfh)
1152 1152 finally:
1153 1153 if dfh:
1154 1154 dfh.close()
1155 1155 ifh.close()
1156 1156
1157 1157 def _addrevision(self, node, text, transaction, link, p1, p2,
1158 1158 cachedelta, ifh, dfh):
1159 1159
1160 1160 btext = [text]
1161 1161 def buildtext():
1162 1162 if btext[0] is not None:
1163 1163 return btext[0]
1164 1164 # flush any pending writes here so we can read it in revision
1165 1165 if dfh:
1166 1166 dfh.flush()
1167 1167 ifh.flush()
1168 1168 basetext = self.revision(self.node(cachedelta[0]))
1169 1169 btext[0] = mdiff.patch(basetext, cachedelta[1])
1170 1170 chk = hash(btext[0], p1, p2)
1171 1171 if chk != node:
1172 1172 raise RevlogError(_("consistency error in delta"))
1173 1173 return btext[0]
1174 1174
1175 def builddelta(rev):
1176 # can we use the cached delta?
1177 if cachedelta and cachedelta[0] == rev:
1178 delta = cachedelta[1]
1179 else:
1180 t = buildtext()
1181 ptext = self.revision(self.node(rev))
1182 delta = mdiff.textdiff(ptext, t)
1183 data = compress(delta)
1184 l = len(data[1]) + len(data[0])
1185 base = self.base(rev)
1186 dist = l + offset - self.start(base)
1187 return dist, l, data, base
1188
1175 1189 curr = len(self)
1176 1190 prev = curr - 1
1177 1191 base = curr
1178 1192 offset = self.end(prev)
1179 1193 flags = 0
1180 1194 d = None
1181 1195
1182 1196 if self._parentdelta:
1183 1197 deltarev, deltanode = self.rev(p1), p1
1184 1198 flags = REVIDX_PARENTDELTA
1185 1199 else:
1186 1200 deltarev, deltanode = prev, self.node(prev)
1187 1201
1188 1202 # should we try to build a delta?
1189 1203 if deltarev != nullrev:
1190 # can we use the cached delta?
1191 if cachedelta and cachedelta[0] == deltarev:
1192 d = cachedelta[1]
1193 else:
1194 t = buildtext()
1195 ptext = self.revision(deltanode)
1196 d = mdiff.textdiff(ptext, t)
1197 data = compress(d)
1198 l = len(data[1]) + len(data[0])
1199 base = self.base(deltarev)
1200 dist = l + offset - self.start(base)
1204 d = builddelta(deltarev)
1205 dist, l, data, base = d
1201 1206
1202 1207 # full versions are inserted when the needed deltas
1203 1208 # become comparable to the uncompressed text
1204 1209 # or the base revision is punched
1205 1210 if text is None:
1206 1211 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1207 1212 cachedelta[1])
1208 1213 else:
1209 1214 textlen = len(text)
1210 1215 if (d is None or dist > textlen * 2 or
1211 1216 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1212 1217 text = buildtext()
1213 1218 data = compress(text)
1214 1219 l = len(data[1]) + len(data[0])
1215 1220 base = curr
1216 1221
1217 1222 e = (offset_type(offset, flags), l, textlen,
1218 1223 base, link, self.rev(p1), self.rev(p2), node)
1219 1224 self.index.insert(-1, e)
1220 1225 self.nodemap[node] = curr
1221 1226
1222 1227 entry = self._io.packentry(e, self.node, self.version, curr)
1223 1228 if not self._inline:
1224 1229 transaction.add(self.datafile, offset)
1225 1230 transaction.add(self.indexfile, curr * len(entry))
1226 1231 if data[0]:
1227 1232 dfh.write(data[0])
1228 1233 dfh.write(data[1])
1229 1234 dfh.flush()
1230 1235 ifh.write(entry)
1231 1236 else:
1232 1237 offset += curr * self._io.size
1233 1238 transaction.add(self.indexfile, offset, curr)
1234 1239 ifh.write(entry)
1235 1240 ifh.write(data[0])
1236 1241 ifh.write(data[1])
1237 1242 self.checkinlinesize(transaction, ifh)
1238 1243
1239 1244 if type(text) == str: # only accept immutable objects
1240 1245 self._cache = (node, curr, text)
1241 1246 return node
1242 1247
1243 1248 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1244 1249 """Calculate a delta group, yielding a sequence of changegroup chunks
1245 1250 (strings).
1246 1251
1247 1252 Given a list of changeset revs, return a set of deltas and
1248 1253 metadata corresponding to nodes. The first delta is
1249 1254 first parent(nodelist[0]) -> nodelist[0], the receiver is
1250 1255 guaranteed to have this parent as it has all history before
1251 1256 these changesets. In the case firstparent is nullrev the
1252 1257 changegroup starts with a full revision.
1253 1258 fullrev forces the insertion of the full revision, necessary
1254 1259 in the case of shallow clones where the first parent might
1255 1260 not exist at the reciever.
1256 1261 """
1257 1262
1258 1263 revs = [self.rev(n) for n in nodelist]
1259 1264
1260 1265 # if we don't have any revisions touched by these changesets, bail
1261 1266 if not revs:
1262 1267 yield changegroup.closechunk()
1263 1268 return
1264 1269
1265 1270 # add the parent of the first rev
1266 1271 p = self.parentrevs(revs[0])[0]
1267 1272 revs.insert(0, p)
1268 1273 if p == nullrev:
1269 1274 fullrev = True
1270 1275
1271 1276 # build deltas
1272 1277 for d in xrange(len(revs) - 1):
1273 1278 a, b = revs[d], revs[d + 1]
1274 1279 nb = self.node(b)
1275 1280
1276 1281 if infocollect is not None:
1277 1282 infocollect(nb)
1278 1283
1279 1284 p = self.parents(nb)
1280 1285 meta = nb + p[0] + p[1] + lookup(nb)
1281 1286 if fullrev:
1282 1287 d = self.revision(nb)
1283 1288 meta += mdiff.trivialdiffheader(len(d))
1284 1289 fullrev = False
1285 1290 else:
1286 1291 d = self.revdiff(a, b)
1287 1292 yield changegroup.chunkheader(len(meta) + len(d))
1288 1293 yield meta
1289 1294 yield d
1290 1295
1291 1296 yield changegroup.closechunk()
1292 1297
1293 1298 def addgroup(self, bundle, linkmapper, transaction):
1294 1299 """
1295 1300 add a delta group
1296 1301
1297 1302 given a set of deltas, add them to the revision log. the
1298 1303 first delta is against its parent, which should be in our
1299 1304 log, the rest are against the previous delta.
1300 1305 """
1301 1306
1302 1307 # track the base of the current delta log
1303 1308 node = None
1304 1309
1305 1310 r = len(self)
1306 1311 end = 0
1307 1312 if r:
1308 1313 end = self.end(r - 1)
1309 1314 ifh = self.opener(self.indexfile, "a+")
1310 1315 isize = r * self._io.size
1311 1316 if self._inline:
1312 1317 transaction.add(self.indexfile, end + isize, r)
1313 1318 dfh = None
1314 1319 else:
1315 1320 transaction.add(self.indexfile, isize, r)
1316 1321 transaction.add(self.datafile, end)
1317 1322 dfh = self.opener(self.datafile, "a")
1318 1323
1319 1324 try:
1320 1325 # loop through our set of deltas
1321 1326 chain = None
1322 1327 while 1:
1323 1328 chunkdata = bundle.parsechunk()
1324 1329 if not chunkdata:
1325 1330 break
1326 1331 node = chunkdata['node']
1327 1332 p1 = chunkdata['p1']
1328 1333 p2 = chunkdata['p2']
1329 1334 cs = chunkdata['cs']
1330 1335 delta = chunkdata['data']
1331 1336
1332 1337 link = linkmapper(cs)
1333 1338 if (node in self.nodemap and
1334 1339 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1335 1340 # this can happen if two branches make the same change
1336 1341 chain = node
1337 1342 continue
1338 1343
1339 1344 for p in (p1, p2):
1340 1345 if not p in self.nodemap:
1341 1346 if self._shallow:
1342 1347 # add null entries for missing parents
1343 1348 # XXX FIXME
1344 1349 #if base == nullrev:
1345 1350 # base = len(self)
1346 1351 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1347 1352 # 0, 0, base, nullrev, nullrev, nullrev, p)
1348 1353 #self.index.insert(-1, e)
1349 1354 #self.nodemap[p] = r
1350 1355 #entry = self._io.packentry(e, self.node,
1351 1356 # self.version, r)
1352 1357 #ifh.write(entry)
1353 1358 #t, r = r, r + 1
1354 1359 raise LookupError(p, self.indexfile,
1355 1360 _('unknown parent'))
1356 1361 else:
1357 1362 raise LookupError(p, self.indexfile,
1358 1363 _('unknown parent'))
1359 1364
1360 1365 if not chain:
1361 1366 # retrieve the parent revision of the delta chain
1362 1367 chain = p1
1363 1368 if not chain in self.nodemap:
1364 1369 raise LookupError(chain, self.indexfile, _('unknown base'))
1365 1370
1366 1371 chainrev = self.rev(chain)
1367 1372 chain = self._addrevision(node, None, transaction, link,
1368 1373 p1, p2, (chainrev, delta), ifh, dfh)
1369 1374 if not dfh and not self._inline:
1370 1375 # addrevision switched from inline to conventional
1371 1376 # reopen the index
1372 1377 dfh = self.opener(self.datafile, "a")
1373 1378 ifh = self.opener(self.indexfile, "a")
1374 1379 finally:
1375 1380 if dfh:
1376 1381 dfh.close()
1377 1382 ifh.close()
1378 1383
1379 1384 return node
1380 1385
1381 1386 def strip(self, minlink, transaction):
1382 1387 """truncate the revlog on the first revision with a linkrev >= minlink
1383 1388
1384 1389 This function is called when we're stripping revision minlink and
1385 1390 its descendants from the repository.
1386 1391
1387 1392 We have to remove all revisions with linkrev >= minlink, because
1388 1393 the equivalent changelog revisions will be renumbered after the
1389 1394 strip.
1390 1395
1391 1396 So we truncate the revlog on the first of these revisions, and
1392 1397 trust that the caller has saved the revisions that shouldn't be
1393 1398 removed and that it'll readd them after this truncation.
1394 1399 """
1395 1400 if len(self) == 0:
1396 1401 return
1397 1402
1398 1403 if isinstance(self.index, lazyindex):
1399 1404 self._loadindexmap()
1400 1405
1401 1406 for rev in self:
1402 1407 if self.index[rev][4] >= minlink:
1403 1408 break
1404 1409 else:
1405 1410 return
1406 1411
1407 1412 # first truncate the files on disk
1408 1413 end = self.start(rev)
1409 1414 if not self._inline:
1410 1415 transaction.add(self.datafile, end)
1411 1416 end = rev * self._io.size
1412 1417 else:
1413 1418 end += rev * self._io.size
1414 1419
1415 1420 transaction.add(self.indexfile, end)
1416 1421
1417 1422 # then reset internal state in memory to forget those revisions
1418 1423 self._cache = None
1419 1424 self._chunkclear()
1420 1425 for x in xrange(rev, len(self)):
1421 1426 del self.nodemap[self.node(x)]
1422 1427
1423 1428 del self.index[rev:-1]
1424 1429
1425 1430 def checksize(self):
1426 1431 expected = 0
1427 1432 if len(self):
1428 1433 expected = max(0, self.end(len(self) - 1))
1429 1434
1430 1435 try:
1431 1436 f = self.opener(self.datafile)
1432 1437 f.seek(0, 2)
1433 1438 actual = f.tell()
1434 1439 dd = actual - expected
1435 1440 except IOError, inst:
1436 1441 if inst.errno != errno.ENOENT:
1437 1442 raise
1438 1443 dd = 0
1439 1444
1440 1445 try:
1441 1446 f = self.opener(self.indexfile)
1442 1447 f.seek(0, 2)
1443 1448 actual = f.tell()
1444 1449 s = self._io.size
1445 1450 i = max(0, actual // s)
1446 1451 di = actual - (i * s)
1447 1452 if self._inline:
1448 1453 databytes = 0
1449 1454 for r in self:
1450 1455 databytes += max(0, self.length(r))
1451 1456 dd = 0
1452 1457 di = actual - len(self) * s - databytes
1453 1458 except IOError, inst:
1454 1459 if inst.errno != errno.ENOENT:
1455 1460 raise
1456 1461 di = 0
1457 1462
1458 1463 return (dd, di)
1459 1464
1460 1465 def files(self):
1461 1466 res = [self.indexfile]
1462 1467 if not self._inline:
1463 1468 res.append(self.datafile)
1464 1469 return res
General Comments 0
You need to be logged in to leave comments. Login now