##// END OF EJS Templates
revlog: break hash checking into subfunction
Matt Mackall -
r13239:12ed25f3 default
parent child Browse files
Show More
@@ -1,1477 +1,1482 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # amount of data read unconditionally, should be >= 4
42 42 # when not inline: threshold for using lazy index
43 43 _prereadsize = 1048576
44 44 # max size of revlog with inline data
45 45 _maxinline = 131072
46 46
47 47 RevlogError = error.RevlogError
48 48 LookupError = error.LookupError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def compress(text):
83 83 """ generate a possibly-compressed representation of text """
84 84 if not text:
85 85 return ("", text)
86 86 l = len(text)
87 87 bin = None
88 88 if l < 44:
89 89 pass
90 90 elif l > 1000000:
91 91 # zlib makes an internal copy, thus doubling memory usage for
92 92 # large files, so lets do this in pieces
93 93 z = zlib.compressobj()
94 94 p = []
95 95 pos = 0
96 96 while pos < l:
97 97 pos2 = pos + 2**20
98 98 p.append(z.compress(text[pos:pos2]))
99 99 pos = pos2
100 100 p.append(z.flush())
101 101 if sum(map(len, p)) < l:
102 102 bin = "".join(p)
103 103 else:
104 104 bin = _compress(text)
105 105 if bin is None or len(bin) > l:
106 106 if text[0] == '\0':
107 107 return ("", text)
108 108 return ('u', text)
109 109 return ("", bin)
110 110
111 111 def decompress(bin):
112 112 """ decompress the given input """
113 113 if not bin:
114 114 return bin
115 115 t = bin[0]
116 116 if t == '\0':
117 117 return bin
118 118 if t == 'x':
119 119 return _decompress(bin)
120 120 if t == 'u':
121 121 return bin[1:]
122 122 raise RevlogError(_("unknown compression type %r") % t)
123 123
124 124 class lazyparser(object):
125 125 """
126 126 this class avoids the need to parse the entirety of large indices
127 127 """
128 128
129 129 # lazyparser is not safe to use on windows if win32 extensions not
130 130 # available. it keeps file handle open, which make it not possible
131 131 # to break hardlinks on local cloned repos.
132 132
133 133 def __init__(self, dataf):
134 134 try:
135 135 size = util.fstat(dataf).st_size
136 136 except AttributeError:
137 137 size = 0
138 138 self.dataf = dataf
139 139 self.s = struct.calcsize(indexformatng)
140 140 self.datasize = size
141 141 self.l = size // self.s
142 142 self.index = [None] * self.l
143 143 self.map = {nullid: nullrev}
144 144 self.allmap = 0
145 145 self.all = 0
146 146 self.mapfind_count = 0
147 147
148 148 def loadmap(self):
149 149 """
150 150 during a commit, we need to make sure the rev being added is
151 151 not a duplicate. This requires loading the entire index,
152 152 which is fairly slow. loadmap can load up just the node map,
153 153 which takes much less time.
154 154 """
155 155 if self.allmap:
156 156 return
157 157 end = self.datasize
158 158 self.allmap = 1
159 159 cur = 0
160 160 count = 0
161 161 blocksize = self.s * 256
162 162 self.dataf.seek(0)
163 163 while cur < end:
164 164 data = self.dataf.read(blocksize)
165 165 off = 0
166 166 for x in xrange(256):
167 167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 168 self.map[n] = count
169 169 count += 1
170 170 if count >= self.l:
171 171 break
172 172 off += self.s
173 173 cur += blocksize
174 174
175 175 def loadblock(self, blockstart, blocksize, data=None):
176 176 if self.all:
177 177 return
178 178 if data is None:
179 179 self.dataf.seek(blockstart)
180 180 if blockstart + blocksize > self.datasize:
181 181 # the revlog may have grown since we've started running,
182 182 # but we don't have space in self.index for more entries.
183 183 # limit blocksize so that we don't get too much data.
184 184 blocksize = max(self.datasize - blockstart, 0)
185 185 data = self.dataf.read(blocksize)
186 186 lend = len(data) // self.s
187 187 i = blockstart // self.s
188 188 off = 0
189 189 # lazyindex supports __delitem__
190 190 if lend > len(self.index) - i:
191 191 lend = len(self.index) - i
192 192 for x in xrange(lend):
193 193 if self.index[i + x] is None:
194 194 b = data[off : off + self.s]
195 195 self.index[i + x] = b
196 196 n = b[ngshaoffset:ngshaoffset + 20]
197 197 self.map[n] = i + x
198 198 off += self.s
199 199
200 200 def findnode(self, node):
201 201 """search backwards through the index file for a specific node"""
202 202 if self.allmap:
203 203 return None
204 204
205 205 # hg log will cause many many searches for the manifest
206 206 # nodes. After we get called a few times, just load the whole
207 207 # thing.
208 208 if self.mapfind_count > 8:
209 209 self.loadmap()
210 210 if node in self.map:
211 211 return node
212 212 return None
213 213 self.mapfind_count += 1
214 214 last = self.l - 1
215 215 while self.index[last] is not None:
216 216 if last == 0:
217 217 self.all = 1
218 218 self.allmap = 1
219 219 return None
220 220 last -= 1
221 221 end = (last + 1) * self.s
222 222 blocksize = self.s * 256
223 223 while end >= 0:
224 224 start = max(end - blocksize, 0)
225 225 self.dataf.seek(start)
226 226 data = self.dataf.read(end - start)
227 227 findend = end - start
228 228 while True:
229 229 # we're searching backwards, so we have to make sure
230 230 # we don't find a changeset where this node is a parent
231 231 off = data.find(node, 0, findend)
232 232 findend = off
233 233 if off >= 0:
234 234 i = off / self.s
235 235 off = i * self.s
236 236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 237 if n == node:
238 238 self.map[n] = i + start / self.s
239 239 return node
240 240 else:
241 241 break
242 242 end -= blocksize
243 243 return None
244 244
245 245 def loadindex(self, i=None, end=None):
246 246 if self.all:
247 247 return
248 248 all = False
249 249 if i is None:
250 250 blockstart = 0
251 251 blocksize = (65536 / self.s) * self.s
252 252 end = self.datasize
253 253 all = True
254 254 else:
255 255 if end:
256 256 blockstart = i * self.s
257 257 end = end * self.s
258 258 blocksize = end - blockstart
259 259 else:
260 260 blockstart = (i & ~1023) * self.s
261 261 blocksize = self.s * 1024
262 262 end = blockstart + blocksize
263 263 while blockstart < end:
264 264 self.loadblock(blockstart, blocksize)
265 265 blockstart += blocksize
266 266 if all:
267 267 self.all = True
268 268
269 269 class lazyindex(object):
270 270 """a lazy version of the index array"""
271 271 def __init__(self, parser):
272 272 self.p = parser
273 273 def __len__(self):
274 274 return len(self.p.index)
275 275 def load(self, pos):
276 276 if pos < 0:
277 277 pos += len(self.p.index)
278 278 self.p.loadindex(pos)
279 279 return self.p.index[pos]
280 280 def __getitem__(self, pos):
281 281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 282 def __setitem__(self, pos, item):
283 283 self.p.index[pos] = _pack(indexformatng, *item)
284 284 def __delitem__(self, pos):
285 285 del self.p.index[pos]
286 286 def insert(self, pos, e):
287 287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 288 def append(self, e):
289 289 self.p.index.append(_pack(indexformatng, *e))
290 290
291 291 class lazymap(object):
292 292 """a lazy version of the node map"""
293 293 def __init__(self, parser):
294 294 self.p = parser
295 295 def load(self, key):
296 296 n = self.p.findnode(key)
297 297 if n is None:
298 298 raise KeyError(key)
299 299 def __contains__(self, key):
300 300 if key in self.p.map:
301 301 return True
302 302 self.p.loadmap()
303 303 return key in self.p.map
304 304 def __iter__(self):
305 305 yield nullid
306 306 for i, ret in enumerate(self.p.index):
307 307 if not ret:
308 308 self.p.loadindex(i)
309 309 ret = self.p.index[i]
310 310 if isinstance(ret, str):
311 311 ret = _unpack(indexformatng, ret)
312 312 yield ret[7]
313 313 def __getitem__(self, key):
314 314 try:
315 315 return self.p.map[key]
316 316 except KeyError:
317 317 try:
318 318 self.load(key)
319 319 return self.p.map[key]
320 320 except KeyError:
321 321 raise KeyError("node " + hex(key))
322 322 def __setitem__(self, key, val):
323 323 self.p.map[key] = val
324 324 def __delitem__(self, key):
325 325 del self.p.map[key]
326 326
327 327 indexformatv0 = ">4l20s20s20s"
328 328 v0shaoffset = 56
329 329
330 330 class revlogoldio(object):
331 331 def __init__(self):
332 332 self.size = struct.calcsize(indexformatv0)
333 333
334 334 def parseindex(self, fp, data, inline):
335 335 s = self.size
336 336 index = []
337 337 nodemap = {nullid: nullrev}
338 338 n = off = 0
339 339 if len(data) == _prereadsize:
340 340 data += fp.read() # read the rest
341 341 l = len(data)
342 342 while off + s <= l:
343 343 cur = data[off:off + s]
344 344 off += s
345 345 e = _unpack(indexformatv0, cur)
346 346 # transform to revlogv1 format
347 347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 349 index.append(e2)
350 350 nodemap[e[6]] = n
351 351 n += 1
352 352
353 353 return index, nodemap, None
354 354
355 355 def packentry(self, entry, node, version, rev):
356 356 if gettype(entry[0]):
357 357 raise RevlogError(_("index entry flags need RevlogNG"))
358 358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 359 node(entry[5]), node(entry[6]), entry[7])
360 360 return _pack(indexformatv0, *e2)
361 361
362 362 # index ng:
363 363 # 6 bytes: offset
364 364 # 2 bytes: flags
365 365 # 4 bytes: compressed length
366 366 # 4 bytes: uncompressed length
367 367 # 4 bytes: base rev
368 368 # 4 bytes: link rev
369 369 # 4 bytes: parent 1 rev
370 370 # 4 bytes: parent 2 rev
371 371 # 32 bytes: nodeid
372 372 indexformatng = ">Qiiiiii20s12x"
373 373 ngshaoffset = 32
374 374 versionformat = ">I"
375 375
376 376 class revlogio(object):
377 377 def __init__(self):
378 378 self.size = struct.calcsize(indexformatng)
379 379
380 380 def parseindex(self, fp, data, inline):
381 381 if len(data) == _prereadsize:
382 382 if util.openhardlinks() and not inline:
383 383 # big index, let's parse it on demand
384 384 parser = lazyparser(fp)
385 385 index = lazyindex(parser)
386 386 nodemap = lazymap(parser)
387 387 e = list(index[0])
388 388 type = gettype(e[0])
389 389 e[0] = offset_type(0, type)
390 390 index[0] = e
391 391 return index, nodemap, None
392 392 else:
393 393 data += fp.read()
394 394
395 395 # call the C implementation to parse the index data
396 396 index, nodemap, cache = parsers.parse_index(data, inline)
397 397 return index, nodemap, cache
398 398
399 399 def packentry(self, entry, node, version, rev):
400 400 p = _pack(indexformatng, *entry)
401 401 if rev == 0:
402 402 p = _pack(versionformat, version) + p[4:]
403 403 return p
404 404
405 405 class revlog(object):
406 406 """
407 407 the underlying revision storage object
408 408
409 409 A revlog consists of two parts, an index and the revision data.
410 410
411 411 The index is a file with a fixed record size containing
412 412 information on each revision, including its nodeid (hash), the
413 413 nodeids of its parents, the position and offset of its data within
414 414 the data file, and the revision it's based on. Finally, each entry
415 415 contains a linkrev entry that can serve as a pointer to external
416 416 data.
417 417
418 418 The revision data itself is a linear collection of data chunks.
419 419 Each chunk represents a revision and is usually represented as a
420 420 delta against the previous chunk. To bound lookup time, runs of
421 421 deltas are limited to about 2 times the length of the original
422 422 version data. This makes retrieval of a version proportional to
423 423 its size, or O(1) relative to the number of revisions.
424 424
425 425 Both pieces of the revlog are written to in an append-only
426 426 fashion, which means we never need to rewrite a file to insert or
427 427 remove data, and can use some simple techniques to avoid the need
428 428 for locking while reading.
429 429 """
430 430 def __init__(self, opener, indexfile, shallowroot=None):
431 431 """
432 432 create a revlog object
433 433
434 434 opener is a function that abstracts the file opening operation
435 435 and can be used to implement COW semantics or the like.
436 436 """
437 437 self.indexfile = indexfile
438 438 self.datafile = indexfile[:-2] + ".d"
439 439 self.opener = opener
440 440 self._cache = None
441 441 self._chunkcache = (0, '')
442 442 self.nodemap = {nullid: nullrev}
443 443 self.index = []
444 444 self._shallowroot = shallowroot
445 445 self._parentdelta = 0
446 446
447 447 v = REVLOG_DEFAULT_VERSION
448 448 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 449 v = opener.options['defversion']
450 450 if v & REVLOGNG:
451 451 v |= REVLOGNGINLINEDATA
452 452 if v & REVLOGNG and 'parentdelta' in opener.options:
453 453 self._parentdelta = 1
454 454
455 455 if shallowroot:
456 456 v |= REVLOGSHALLOW
457 457
458 458 i = ''
459 459 try:
460 460 f = self.opener(self.indexfile)
461 461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 462 i = f.read()
463 463 else:
464 464 i = f.read(_prereadsize)
465 465 if len(i) > 0:
466 466 v = struct.unpack(versionformat, i[:4])[0]
467 467 except IOError, inst:
468 468 if inst.errno != errno.ENOENT:
469 469 raise
470 470
471 471 self.version = v
472 472 self._inline = v & REVLOGNGINLINEDATA
473 473 self._shallow = v & REVLOGSHALLOW
474 474 flags = v & ~0xFFFF
475 475 fmt = v & 0xFFFF
476 476 if fmt == REVLOGV0 and flags:
477 477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 478 % (self.indexfile, flags >> 16))
479 479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 481 % (self.indexfile, flags >> 16))
482 482 elif fmt > REVLOGNG:
483 483 raise RevlogError(_("index %s unknown format %d")
484 484 % (self.indexfile, fmt))
485 485
486 486 self._io = revlogio()
487 487 if self.version == REVLOGV0:
488 488 self._io = revlogoldio()
489 489 if i:
490 490 try:
491 491 d = self._io.parseindex(f, i, self._inline)
492 492 except (ValueError, IndexError):
493 493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 494 self.index, self.nodemap, self._chunkcache = d
495 495 if not self._chunkcache:
496 496 self._chunkclear()
497 497
498 498 # add the magic null revision at -1 (if it hasn't been done already)
499 499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 500 self.index[-1][7] != nullid) :
501 501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 502
503 503 def _loadindex(self, start, end):
504 504 """load a block of indexes all at once from the lazy parser"""
505 505 if isinstance(self.index, lazyindex):
506 506 self.index.p.loadindex(start, end)
507 507
508 508 def _loadindexmap(self):
509 509 """loads both the map and the index from the lazy parser"""
510 510 if isinstance(self.index, lazyindex):
511 511 p = self.index.p
512 512 p.loadindex()
513 513 self.nodemap = p.map
514 514
515 515 def _loadmap(self):
516 516 """loads the map from the lazy parser"""
517 517 if isinstance(self.nodemap, lazymap):
518 518 self.nodemap.p.loadmap()
519 519 self.nodemap = self.nodemap.p.map
520 520
521 521 def tip(self):
522 522 return self.node(len(self.index) - 2)
523 523 def __len__(self):
524 524 return len(self.index) - 1
525 525 def __iter__(self):
526 526 for i in xrange(len(self)):
527 527 yield i
528 528 def rev(self, node):
529 529 try:
530 530 return self.nodemap[node]
531 531 except KeyError:
532 532 raise LookupError(node, self.indexfile, _('no node'))
533 533 def node(self, rev):
534 534 return self.index[rev][7]
535 535 def linkrev(self, rev):
536 536 return self.index[rev][4]
537 537 def parents(self, node):
538 538 i = self.index
539 539 d = i[self.rev(node)]
540 540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 541 def parentrevs(self, rev):
542 542 return self.index[rev][5:7]
543 543 def start(self, rev):
544 544 return int(self.index[rev][0] >> 16)
545 545 def end(self, rev):
546 546 return self.start(rev) + self.length(rev)
547 547 def length(self, rev):
548 548 return self.index[rev][1]
549 549 def base(self, rev):
550 550 return self.index[rev][3]
551 551 def flags(self, rev):
552 552 return self.index[rev][0] & 0xFFFF
553 553 def rawsize(self, rev):
554 554 """return the length of the uncompressed text for a given revision"""
555 555 l = self.index[rev][2]
556 556 if l >= 0:
557 557 return l
558 558
559 559 t = self.revision(self.node(rev))
560 560 return len(t)
561 561 size = rawsize
562 562
563 563 def reachable(self, node, stop=None):
564 564 """return the set of all nodes ancestral to a given node, including
565 565 the node itself, stopping when stop is matched"""
566 566 reachable = set((node,))
567 567 visit = [node]
568 568 if stop:
569 569 stopn = self.rev(stop)
570 570 else:
571 571 stopn = 0
572 572 while visit:
573 573 n = visit.pop(0)
574 574 if n == stop:
575 575 continue
576 576 if n == nullid:
577 577 continue
578 578 for p in self.parents(n):
579 579 if self.rev(p) < stopn:
580 580 continue
581 581 if p not in reachable:
582 582 reachable.add(p)
583 583 visit.append(p)
584 584 return reachable
585 585
586 586 def ancestors(self, *revs):
587 587 """Generate the ancestors of 'revs' in reverse topological order.
588 588
589 589 Yield a sequence of revision numbers starting with the parents
590 590 of each revision in revs, i.e., each revision is *not* considered
591 591 an ancestor of itself. Results are in breadth-first order:
592 592 parents of each rev in revs, then parents of those, etc. Result
593 593 does not include the null revision."""
594 594 visit = list(revs)
595 595 seen = set([nullrev])
596 596 while visit:
597 597 for parent in self.parentrevs(visit.pop(0)):
598 598 if parent not in seen:
599 599 visit.append(parent)
600 600 seen.add(parent)
601 601 yield parent
602 602
603 603 def descendants(self, *revs):
604 604 """Generate the descendants of 'revs' in revision order.
605 605
606 606 Yield a sequence of revision numbers starting with a child of
607 607 some rev in revs, i.e., each revision is *not* considered a
608 608 descendant of itself. Results are ordered by revision number (a
609 609 topological sort)."""
610 610 first = min(revs)
611 611 if first == nullrev:
612 612 for i in self:
613 613 yield i
614 614 return
615 615
616 616 seen = set(revs)
617 617 for i in xrange(first + 1, len(self)):
618 618 for x in self.parentrevs(i):
619 619 if x != nullrev and x in seen:
620 620 seen.add(i)
621 621 yield i
622 622 break
623 623
624 624 def findmissing(self, common=None, heads=None):
625 625 """Return the ancestors of heads that are not ancestors of common.
626 626
627 627 More specifically, return a list of nodes N such that every N
628 628 satisfies the following constraints:
629 629
630 630 1. N is an ancestor of some node in 'heads'
631 631 2. N is not an ancestor of any node in 'common'
632 632
633 633 The list is sorted by revision number, meaning it is
634 634 topologically sorted.
635 635
636 636 'heads' and 'common' are both lists of node IDs. If heads is
637 637 not supplied, uses all of the revlog's heads. If common is not
638 638 supplied, uses nullid."""
639 639 if common is None:
640 640 common = [nullid]
641 641 if heads is None:
642 642 heads = self.heads()
643 643
644 644 common = [self.rev(n) for n in common]
645 645 heads = [self.rev(n) for n in heads]
646 646
647 647 # we want the ancestors, but inclusive
648 648 has = set(self.ancestors(*common))
649 649 has.add(nullrev)
650 650 has.update(common)
651 651
652 652 # take all ancestors from heads that aren't in has
653 653 missing = set()
654 654 visit = [r for r in heads if r not in has]
655 655 while visit:
656 656 r = visit.pop(0)
657 657 if r in missing:
658 658 continue
659 659 else:
660 660 missing.add(r)
661 661 for p in self.parentrevs(r):
662 662 if p not in has:
663 663 visit.append(p)
664 664 missing = list(missing)
665 665 missing.sort()
666 666 return [self.node(r) for r in missing]
667 667
668 668 def nodesbetween(self, roots=None, heads=None):
669 669 """Return a topological path from 'roots' to 'heads'.
670 670
671 671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
672 672 topologically sorted list of all nodes N that satisfy both of
673 673 these constraints:
674 674
675 675 1. N is a descendant of some node in 'roots'
676 676 2. N is an ancestor of some node in 'heads'
677 677
678 678 Every node is considered to be both a descendant and an ancestor
679 679 of itself, so every reachable node in 'roots' and 'heads' will be
680 680 included in 'nodes'.
681 681
682 682 'outroots' is the list of reachable nodes in 'roots', i.e., the
683 683 subset of 'roots' that is returned in 'nodes'. Likewise,
684 684 'outheads' is the subset of 'heads' that is also in 'nodes'.
685 685
686 686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
687 687 unspecified, uses nullid as the only root. If 'heads' is
688 688 unspecified, uses list of all of the revlog's heads."""
689 689 nonodes = ([], [], [])
690 690 if roots is not None:
691 691 roots = list(roots)
692 692 if not roots:
693 693 return nonodes
694 694 lowestrev = min([self.rev(n) for n in roots])
695 695 else:
696 696 roots = [nullid] # Everybody's a descendent of nullid
697 697 lowestrev = nullrev
698 698 if (lowestrev == nullrev) and (heads is None):
699 699 # We want _all_ the nodes!
700 700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
701 701 if heads is None:
702 702 # All nodes are ancestors, so the latest ancestor is the last
703 703 # node.
704 704 highestrev = len(self) - 1
705 705 # Set ancestors to None to signal that every node is an ancestor.
706 706 ancestors = None
707 707 # Set heads to an empty dictionary for later discovery of heads
708 708 heads = {}
709 709 else:
710 710 heads = list(heads)
711 711 if not heads:
712 712 return nonodes
713 713 ancestors = set()
714 714 # Turn heads into a dictionary so we can remove 'fake' heads.
715 715 # Also, later we will be using it to filter out the heads we can't
716 716 # find from roots.
717 717 heads = dict.fromkeys(heads, 0)
718 718 # Start at the top and keep marking parents until we're done.
719 719 nodestotag = set(heads)
720 720 # Remember where the top was so we can use it as a limit later.
721 721 highestrev = max([self.rev(n) for n in nodestotag])
722 722 while nodestotag:
723 723 # grab a node to tag
724 724 n = nodestotag.pop()
725 725 # Never tag nullid
726 726 if n == nullid:
727 727 continue
728 728 # A node's revision number represents its place in a
729 729 # topologically sorted list of nodes.
730 730 r = self.rev(n)
731 731 if r >= lowestrev:
732 732 if n not in ancestors:
733 733 # If we are possibly a descendent of one of the roots
734 734 # and we haven't already been marked as an ancestor
735 735 ancestors.add(n) # Mark as ancestor
736 736 # Add non-nullid parents to list of nodes to tag.
737 737 nodestotag.update([p for p in self.parents(n) if
738 738 p != nullid])
739 739 elif n in heads: # We've seen it before, is it a fake head?
740 740 # So it is, real heads should not be the ancestors of
741 741 # any other heads.
742 742 heads.pop(n)
743 743 if not ancestors:
744 744 return nonodes
745 745 # Now that we have our set of ancestors, we want to remove any
746 746 # roots that are not ancestors.
747 747
748 748 # If one of the roots was nullid, everything is included anyway.
749 749 if lowestrev > nullrev:
750 750 # But, since we weren't, let's recompute the lowest rev to not
751 751 # include roots that aren't ancestors.
752 752
753 753 # Filter out roots that aren't ancestors of heads
754 754 roots = [n for n in roots if n in ancestors]
755 755 # Recompute the lowest revision
756 756 if roots:
757 757 lowestrev = min([self.rev(n) for n in roots])
758 758 else:
759 759 # No more roots? Return empty list
760 760 return nonodes
761 761 else:
762 762 # We are descending from nullid, and don't need to care about
763 763 # any other roots.
764 764 lowestrev = nullrev
765 765 roots = [nullid]
766 766 # Transform our roots list into a set.
767 767 descendents = set(roots)
768 768 # Also, keep the original roots so we can filter out roots that aren't
769 769 # 'real' roots (i.e. are descended from other roots).
770 770 roots = descendents.copy()
771 771 # Our topologically sorted list of output nodes.
772 772 orderedout = []
773 773 # Don't start at nullid since we don't want nullid in our output list,
774 774 # and if nullid shows up in descedents, empty parents will look like
775 775 # they're descendents.
776 776 for r in xrange(max(lowestrev, 0), highestrev + 1):
777 777 n = self.node(r)
778 778 isdescendent = False
779 779 if lowestrev == nullrev: # Everybody is a descendent of nullid
780 780 isdescendent = True
781 781 elif n in descendents:
782 782 # n is already a descendent
783 783 isdescendent = True
784 784 # This check only needs to be done here because all the roots
785 785 # will start being marked is descendents before the loop.
786 786 if n in roots:
787 787 # If n was a root, check if it's a 'real' root.
788 788 p = tuple(self.parents(n))
789 789 # If any of its parents are descendents, it's not a root.
790 790 if (p[0] in descendents) or (p[1] in descendents):
791 791 roots.remove(n)
792 792 else:
793 793 p = tuple(self.parents(n))
794 794 # A node is a descendent if either of its parents are
795 795 # descendents. (We seeded the dependents list with the roots
796 796 # up there, remember?)
797 797 if (p[0] in descendents) or (p[1] in descendents):
798 798 descendents.add(n)
799 799 isdescendent = True
800 800 if isdescendent and ((ancestors is None) or (n in ancestors)):
801 801 # Only include nodes that are both descendents and ancestors.
802 802 orderedout.append(n)
803 803 if (ancestors is not None) and (n in heads):
804 804 # We're trying to figure out which heads are reachable
805 805 # from roots.
806 806 # Mark this head as having been reached
807 807 heads[n] = 1
808 808 elif ancestors is None:
809 809 # Otherwise, we're trying to discover the heads.
810 810 # Assume this is a head because if it isn't, the next step
811 811 # will eventually remove it.
812 812 heads[n] = 1
813 813 # But, obviously its parents aren't.
814 814 for p in self.parents(n):
815 815 heads.pop(p, None)
816 816 heads = [n for n in heads.iterkeys() if heads[n] != 0]
817 817 roots = list(roots)
818 818 assert orderedout
819 819 assert roots
820 820 assert heads
821 821 return (orderedout, roots, heads)
822 822
823 823 def heads(self, start=None, stop=None):
824 824 """return the list of all nodes that have no children
825 825
826 826 if start is specified, only heads that are descendants of
827 827 start will be returned
828 828 if stop is specified, it will consider all the revs from stop
829 829 as if they had no children
830 830 """
831 831 if start is None and stop is None:
832 832 count = len(self)
833 833 if not count:
834 834 return [nullid]
835 835 ishead = [1] * (count + 1)
836 836 index = self.index
837 837 for r in xrange(count):
838 838 e = index[r]
839 839 ishead[e[5]] = ishead[e[6]] = 0
840 840 return [self.node(r) for r in xrange(count) if ishead[r]]
841 841
842 842 if start is None:
843 843 start = nullid
844 844 if stop is None:
845 845 stop = []
846 846 stoprevs = set([self.rev(n) for n in stop])
847 847 startrev = self.rev(start)
848 848 reachable = set((startrev,))
849 849 heads = set((startrev,))
850 850
851 851 parentrevs = self.parentrevs
852 852 for r in xrange(startrev + 1, len(self)):
853 853 for p in parentrevs(r):
854 854 if p in reachable:
855 855 if r not in stoprevs:
856 856 reachable.add(r)
857 857 heads.add(r)
858 858 if p in heads and p not in stoprevs:
859 859 heads.remove(p)
860 860
861 861 return [self.node(r) for r in heads]
862 862
863 863 def children(self, node):
864 864 """find the children of a given node"""
865 865 c = []
866 866 p = self.rev(node)
867 867 for r in range(p + 1, len(self)):
868 868 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
869 869 if prevs:
870 870 for pr in prevs:
871 871 if pr == p:
872 872 c.append(self.node(r))
873 873 elif p == nullrev:
874 874 c.append(self.node(r))
875 875 return c
876 876
877 877 def descendant(self, start, end):
878 878 if start == nullrev:
879 879 return True
880 880 for i in self.descendants(start):
881 881 if i == end:
882 882 return True
883 883 elif i > end:
884 884 break
885 885 return False
886 886
887 887 def ancestor(self, a, b):
888 888 """calculate the least common ancestor of nodes a and b"""
889 889
890 890 # fast path, check if it is a descendant
891 891 a, b = self.rev(a), self.rev(b)
892 892 start, end = sorted((a, b))
893 893 if self.descendant(start, end):
894 894 return self.node(start)
895 895
896 896 def parents(rev):
897 897 return [p for p in self.parentrevs(rev) if p != nullrev]
898 898
899 899 c = ancestor.ancestor(a, b, parents)
900 900 if c is None:
901 901 return nullid
902 902
903 903 return self.node(c)
904 904
905 905 def _match(self, id):
906 906 if isinstance(id, (long, int)):
907 907 # rev
908 908 return self.node(id)
909 909 if len(id) == 20:
910 910 # possibly a binary node
911 911 # odds of a binary node being all hex in ASCII are 1 in 10**25
912 912 try:
913 913 node = id
914 914 self.rev(node) # quick search the index
915 915 return node
916 916 except LookupError:
917 917 pass # may be partial hex id
918 918 try:
919 919 # str(rev)
920 920 rev = int(id)
921 921 if str(rev) != id:
922 922 raise ValueError
923 923 if rev < 0:
924 924 rev = len(self) + rev
925 925 if rev < 0 or rev >= len(self):
926 926 raise ValueError
927 927 return self.node(rev)
928 928 except (ValueError, OverflowError):
929 929 pass
930 930 if len(id) == 40:
931 931 try:
932 932 # a full hex nodeid?
933 933 node = bin(id)
934 934 self.rev(node)
935 935 return node
936 936 except (TypeError, LookupError):
937 937 pass
938 938
939 939 def _partialmatch(self, id):
940 940 if len(id) < 40:
941 941 try:
942 942 # hex(node)[:...]
943 943 l = len(id) // 2 # grab an even number of digits
944 944 bin_id = bin(id[:l * 2])
945 945 nl = [n for n in self.nodemap if n[:l] == bin_id]
946 946 nl = [n for n in nl if hex(n).startswith(id)]
947 947 if len(nl) > 0:
948 948 if len(nl) == 1:
949 949 return nl[0]
950 950 raise LookupError(id, self.indexfile,
951 951 _('ambiguous identifier'))
952 952 return None
953 953 except TypeError:
954 954 pass
955 955
956 956 def lookup(self, id):
957 957 """locate a node based on:
958 958 - revision number or str(revision number)
959 959 - nodeid or subset of hex nodeid
960 960 """
961 961 n = self._match(id)
962 962 if n is not None:
963 963 return n
964 964 n = self._partialmatch(id)
965 965 if n:
966 966 return n
967 967
968 968 raise LookupError(id, self.indexfile, _('no match found'))
969 969
970 970 def cmp(self, node, text):
971 971 """compare text with a given file revision
972 972
973 973 returns True if text is different than what is stored.
974 974 """
975 975 p1, p2 = self.parents(node)
976 976 return hash(text, p1, p2) != node
977 977
978 978 def _addchunk(self, offset, data):
979 979 o, d = self._chunkcache
980 980 # try to add to existing cache
981 981 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
982 982 self._chunkcache = o, d + data
983 983 else:
984 984 self._chunkcache = offset, data
985 985
986 986 def _loadchunk(self, offset, length):
987 987 if self._inline:
988 988 df = self.opener(self.indexfile)
989 989 else:
990 990 df = self.opener(self.datafile)
991 991
992 992 readahead = max(65536, length)
993 993 df.seek(offset)
994 994 d = df.read(readahead)
995 995 self._addchunk(offset, d)
996 996 if readahead > length:
997 997 return d[:length]
998 998 return d
999 999
1000 1000 def _getchunk(self, offset, length):
1001 1001 o, d = self._chunkcache
1002 1002 l = len(d)
1003 1003
1004 1004 # is it in the cache?
1005 1005 cachestart = offset - o
1006 1006 cacheend = cachestart + length
1007 1007 if cachestart >= 0 and cacheend <= l:
1008 1008 if cachestart == 0 and cacheend == l:
1009 1009 return d # avoid a copy
1010 1010 return d[cachestart:cacheend]
1011 1011
1012 1012 return self._loadchunk(offset, length)
1013 1013
1014 1014 def _chunkraw(self, startrev, endrev):
1015 1015 start = self.start(startrev)
1016 1016 length = self.end(endrev) - start
1017 1017 if self._inline:
1018 1018 start += (startrev + 1) * self._io.size
1019 1019 return self._getchunk(start, length)
1020 1020
1021 1021 def _chunk(self, rev):
1022 1022 return decompress(self._chunkraw(rev, rev))
1023 1023
1024 1024 def _chunkclear(self):
1025 1025 self._chunkcache = (0, '')
1026 1026
1027 1027 def deltaparent(self, rev):
1028 1028 """return previous revision or parentrev according to flags"""
1029 1029 if self.flags(rev) & REVIDX_PARENTDELTA:
1030 1030 return self.parentrevs(rev)[0]
1031 1031 else:
1032 1032 return rev - 1
1033 1033
1034 1034 def revdiff(self, rev1, rev2):
1035 1035 """return or calculate a delta between two revisions"""
1036 1036 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1037 1037 return self._chunk(rev2)
1038 1038
1039 1039 return mdiff.textdiff(self.revision(self.node(rev1)),
1040 1040 self.revision(self.node(rev2)))
1041 1041
1042 1042 def revision(self, node):
1043 1043 """return an uncompressed revision of a given node"""
1044 1044 cachedrev = None
1045 1045 if node == nullid:
1046 1046 return ""
1047 1047 if self._cache:
1048 1048 if self._cache[0] == node:
1049 1049 return self._cache[2]
1050 1050 cachedrev = self._cache[1]
1051 1051
1052 1052 # look up what we need to read
1053 1053 text = None
1054 1054 rev = self.rev(node)
1055 1055 base = self.base(rev)
1056 1056
1057 1057 # check rev flags
1058 1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1059 1059 raise RevlogError(_('incompatible revision flag %x') %
1060 1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061 1061
1062 1062 # build delta chain
1063 1063 self._loadindex(base, rev + 1)
1064 1064 chain = []
1065 1065 index = self.index # for performance
1066 1066 iterrev = rev
1067 1067 e = index[iterrev]
1068 1068 while iterrev != base and iterrev != cachedrev:
1069 1069 chain.append(iterrev)
1070 1070 if e[0] & REVIDX_PARENTDELTA:
1071 1071 iterrev = e[5]
1072 1072 else:
1073 1073 iterrev -= 1
1074 1074 e = index[iterrev]
1075 1075 chain.reverse()
1076 1076 base = iterrev
1077 1077
1078 1078 if iterrev == cachedrev:
1079 1079 # cache hit
1080 1080 text = self._cache[2]
1081 1081
1082 1082 # drop cache to save memory
1083 1083 self._cache = None
1084 1084
1085 1085 self._chunkraw(base, rev)
1086 1086 if text is None:
1087 1087 text = self._chunk(base)
1088 1088
1089 1089 bins = [self._chunk(r) for r in chain]
1090 1090 text = mdiff.patches(text, bins)
1091
1092 text = self._checkhash(text, node)
1093
1094 self._cache = (node, rev, text)
1095 return text
1096
1097 def _checkhash(self, text, node):
1091 1098 p1, p2 = self.parents(node)
1092 1099 if (node != hash(text, p1, p2) and
1093 1100 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1094 1101 raise RevlogError(_("integrity check failed on %s:%d")
1095 1102 % (self.indexfile, rev))
1096
1097 self._cache = (node, rev, text)
1098 1103 return text
1099 1104
1100 1105 def checkinlinesize(self, tr, fp=None):
1101 1106 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1102 1107 return
1103 1108
1104 1109 trinfo = tr.find(self.indexfile)
1105 1110 if trinfo is None:
1106 1111 raise RevlogError(_("%s not found in the transaction")
1107 1112 % self.indexfile)
1108 1113
1109 1114 trindex = trinfo[2]
1110 1115 dataoff = self.start(trindex)
1111 1116
1112 1117 tr.add(self.datafile, dataoff)
1113 1118
1114 1119 if fp:
1115 1120 fp.flush()
1116 1121 fp.close()
1117 1122
1118 1123 df = self.opener(self.datafile, 'w')
1119 1124 try:
1120 1125 for r in self:
1121 1126 df.write(self._chunkraw(r, r))
1122 1127 finally:
1123 1128 df.close()
1124 1129
1125 1130 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1126 1131 self.version &= ~(REVLOGNGINLINEDATA)
1127 1132 self._inline = False
1128 1133 for i in self:
1129 1134 e = self._io.packentry(self.index[i], self.node, self.version, i)
1130 1135 fp.write(e)
1131 1136
1132 1137 # if we don't call rename, the temp file will never replace the
1133 1138 # real index
1134 1139 fp.rename()
1135 1140
1136 1141 tr.replace(self.indexfile, trindex * self._io.size)
1137 1142 self._chunkclear()
1138 1143
1139 1144 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1140 1145 """add a revision to the log
1141 1146
1142 1147 text - the revision data to add
1143 1148 transaction - the transaction object used for rollback
1144 1149 link - the linkrev data to add
1145 1150 p1, p2 - the parent nodeids of the revision
1146 1151 cachedelta - an optional precomputed delta
1147 1152 """
1148 1153 node = hash(text, p1, p2)
1149 1154 if (node in self.nodemap and
1150 1155 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1151 1156 return node
1152 1157
1153 1158 dfh = None
1154 1159 if not self._inline:
1155 1160 dfh = self.opener(self.datafile, "a")
1156 1161 ifh = self.opener(self.indexfile, "a+")
1157 1162 try:
1158 1163 return self._addrevision(node, text, transaction, link, p1, p2,
1159 1164 cachedelta, ifh, dfh)
1160 1165 finally:
1161 1166 if dfh:
1162 1167 dfh.close()
1163 1168 ifh.close()
1164 1169
1165 1170 def _addrevision(self, node, text, transaction, link, p1, p2,
1166 1171 cachedelta, ifh, dfh):
1167 1172
1168 1173 btext = [text]
1169 1174 def buildtext():
1170 1175 if btext[0] is not None:
1171 1176 return btext[0]
1172 1177 # flush any pending writes here so we can read it in revision
1173 1178 if dfh:
1174 1179 dfh.flush()
1175 1180 ifh.flush()
1176 1181 basetext = self.revision(self.node(cachedelta[0]))
1177 1182 btext[0] = mdiff.patch(basetext, cachedelta[1])
1178 1183 chk = hash(btext[0], p1, p2)
1179 1184 if chk != node:
1180 1185 raise RevlogError(_("consistency error in delta"))
1181 1186 return btext[0]
1182 1187
1183 1188 def builddelta(rev):
1184 1189 # can we use the cached delta?
1185 1190 if cachedelta and cachedelta[0] == rev:
1186 1191 delta = cachedelta[1]
1187 1192 else:
1188 1193 t = buildtext()
1189 1194 ptext = self.revision(self.node(rev))
1190 1195 delta = mdiff.textdiff(ptext, t)
1191 1196 data = compress(delta)
1192 1197 l = len(data[1]) + len(data[0])
1193 1198 base = self.base(rev)
1194 1199 dist = l + offset - self.start(base)
1195 1200 return dist, l, data, base
1196 1201
1197 1202 curr = len(self)
1198 1203 prev = curr - 1
1199 1204 base = curr
1200 1205 offset = self.end(prev)
1201 1206 flags = 0
1202 1207 d = None
1203 1208 p1r, p2r = self.rev(p1), self.rev(p2)
1204 1209
1205 1210 # should we try to build a delta?
1206 1211 if prev != nullrev:
1207 1212 d = builddelta(prev)
1208 1213 if self._parentdelta and prev != p1r:
1209 1214 d2 = builddelta(p1r)
1210 1215 if d2 < d:
1211 1216 d = d2
1212 1217 flags = REVIDX_PARENTDELTA
1213 1218 dist, l, data, base = d
1214 1219
1215 1220 # full versions are inserted when the needed deltas
1216 1221 # become comparable to the uncompressed text
1217 1222 # or the base revision is punched
1218 1223 if text is None:
1219 1224 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1220 1225 cachedelta[1])
1221 1226 else:
1222 1227 textlen = len(text)
1223 1228 if (d is None or dist > textlen * 2 or
1224 1229 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1225 1230 text = buildtext()
1226 1231 data = compress(text)
1227 1232 l = len(data[1]) + len(data[0])
1228 1233 base = curr
1229 1234
1230 1235 e = (offset_type(offset, flags), l, textlen,
1231 1236 base, link, p1r, p2r, node)
1232 1237 self.index.insert(-1, e)
1233 1238 self.nodemap[node] = curr
1234 1239
1235 1240 entry = self._io.packentry(e, self.node, self.version, curr)
1236 1241 if not self._inline:
1237 1242 transaction.add(self.datafile, offset)
1238 1243 transaction.add(self.indexfile, curr * len(entry))
1239 1244 if data[0]:
1240 1245 dfh.write(data[0])
1241 1246 dfh.write(data[1])
1242 1247 dfh.flush()
1243 1248 ifh.write(entry)
1244 1249 else:
1245 1250 offset += curr * self._io.size
1246 1251 transaction.add(self.indexfile, offset, curr)
1247 1252 ifh.write(entry)
1248 1253 ifh.write(data[0])
1249 1254 ifh.write(data[1])
1250 1255 self.checkinlinesize(transaction, ifh)
1251 1256
1252 1257 if type(text) == str: # only accept immutable objects
1253 1258 self._cache = (node, curr, text)
1254 1259 return node
1255 1260
1256 1261 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1257 1262 """Calculate a delta group, yielding a sequence of changegroup chunks
1258 1263 (strings).
1259 1264
1260 1265 Given a list of changeset revs, return a set of deltas and
1261 1266 metadata corresponding to nodes. The first delta is
1262 1267 first parent(nodelist[0]) -> nodelist[0], the receiver is
1263 1268 guaranteed to have this parent as it has all history before
1264 1269 these changesets. In the case firstparent is nullrev the
1265 1270 changegroup starts with a full revision.
1266 1271 fullrev forces the insertion of the full revision, necessary
1267 1272 in the case of shallow clones where the first parent might
1268 1273 not exist at the reciever.
1269 1274 """
1270 1275
1271 1276 revs = [self.rev(n) for n in nodelist]
1272 1277
1273 1278 # if we don't have any revisions touched by these changesets, bail
1274 1279 if not revs:
1275 1280 yield changegroup.closechunk()
1276 1281 return
1277 1282
1278 1283 # add the parent of the first rev
1279 1284 p = self.parentrevs(revs[0])[0]
1280 1285 revs.insert(0, p)
1281 1286 if p == nullrev:
1282 1287 fullrev = True
1283 1288
1284 1289 # build deltas
1285 1290 for d in xrange(len(revs) - 1):
1286 1291 a, b = revs[d], revs[d + 1]
1287 1292 nb = self.node(b)
1288 1293
1289 1294 if infocollect is not None:
1290 1295 infocollect(nb)
1291 1296
1292 1297 p = self.parents(nb)
1293 1298 meta = nb + p[0] + p[1] + lookup(nb)
1294 1299 if fullrev:
1295 1300 d = self.revision(nb)
1296 1301 meta += mdiff.trivialdiffheader(len(d))
1297 1302 fullrev = False
1298 1303 else:
1299 1304 d = self.revdiff(a, b)
1300 1305 yield changegroup.chunkheader(len(meta) + len(d))
1301 1306 yield meta
1302 1307 yield d
1303 1308
1304 1309 yield changegroup.closechunk()
1305 1310
1306 1311 def addgroup(self, bundle, linkmapper, transaction):
1307 1312 """
1308 1313 add a delta group
1309 1314
1310 1315 given a set of deltas, add them to the revision log. the
1311 1316 first delta is against its parent, which should be in our
1312 1317 log, the rest are against the previous delta.
1313 1318 """
1314 1319
1315 1320 # track the base of the current delta log
1316 1321 node = None
1317 1322
1318 1323 r = len(self)
1319 1324 end = 0
1320 1325 if r:
1321 1326 end = self.end(r - 1)
1322 1327 ifh = self.opener(self.indexfile, "a+")
1323 1328 isize = r * self._io.size
1324 1329 if self._inline:
1325 1330 transaction.add(self.indexfile, end + isize, r)
1326 1331 dfh = None
1327 1332 else:
1328 1333 transaction.add(self.indexfile, isize, r)
1329 1334 transaction.add(self.datafile, end)
1330 1335 dfh = self.opener(self.datafile, "a")
1331 1336
1332 1337 try:
1333 1338 # loop through our set of deltas
1334 1339 chain = None
1335 1340 while 1:
1336 1341 chunkdata = bundle.parsechunk()
1337 1342 if not chunkdata:
1338 1343 break
1339 1344 node = chunkdata['node']
1340 1345 p1 = chunkdata['p1']
1341 1346 p2 = chunkdata['p2']
1342 1347 cs = chunkdata['cs']
1343 1348 delta = chunkdata['data']
1344 1349
1345 1350 link = linkmapper(cs)
1346 1351 if (node in self.nodemap and
1347 1352 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1348 1353 # this can happen if two branches make the same change
1349 1354 chain = node
1350 1355 continue
1351 1356
1352 1357 for p in (p1, p2):
1353 1358 if not p in self.nodemap:
1354 1359 if self._shallow:
1355 1360 # add null entries for missing parents
1356 1361 # XXX FIXME
1357 1362 #if base == nullrev:
1358 1363 # base = len(self)
1359 1364 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1360 1365 # 0, 0, base, nullrev, nullrev, nullrev, p)
1361 1366 #self.index.insert(-1, e)
1362 1367 #self.nodemap[p] = r
1363 1368 #entry = self._io.packentry(e, self.node,
1364 1369 # self.version, r)
1365 1370 #ifh.write(entry)
1366 1371 #t, r = r, r + 1
1367 1372 raise LookupError(p, self.indexfile,
1368 1373 _('unknown parent'))
1369 1374 else:
1370 1375 raise LookupError(p, self.indexfile,
1371 1376 _('unknown parent'))
1372 1377
1373 1378 if not chain:
1374 1379 # retrieve the parent revision of the delta chain
1375 1380 chain = p1
1376 1381 if not chain in self.nodemap:
1377 1382 raise LookupError(chain, self.indexfile, _('unknown base'))
1378 1383
1379 1384 chainrev = self.rev(chain)
1380 1385 chain = self._addrevision(node, None, transaction, link,
1381 1386 p1, p2, (chainrev, delta), ifh, dfh)
1382 1387 if not dfh and not self._inline:
1383 1388 # addrevision switched from inline to conventional
1384 1389 # reopen the index
1385 1390 dfh = self.opener(self.datafile, "a")
1386 1391 ifh = self.opener(self.indexfile, "a")
1387 1392 finally:
1388 1393 if dfh:
1389 1394 dfh.close()
1390 1395 ifh.close()
1391 1396
1392 1397 return node
1393 1398
1394 1399 def strip(self, minlink, transaction):
1395 1400 """truncate the revlog on the first revision with a linkrev >= minlink
1396 1401
1397 1402 This function is called when we're stripping revision minlink and
1398 1403 its descendants from the repository.
1399 1404
1400 1405 We have to remove all revisions with linkrev >= minlink, because
1401 1406 the equivalent changelog revisions will be renumbered after the
1402 1407 strip.
1403 1408
1404 1409 So we truncate the revlog on the first of these revisions, and
1405 1410 trust that the caller has saved the revisions that shouldn't be
1406 1411 removed and that it'll readd them after this truncation.
1407 1412 """
1408 1413 if len(self) == 0:
1409 1414 return
1410 1415
1411 1416 if isinstance(self.index, lazyindex):
1412 1417 self._loadindexmap()
1413 1418
1414 1419 for rev in self:
1415 1420 if self.index[rev][4] >= minlink:
1416 1421 break
1417 1422 else:
1418 1423 return
1419 1424
1420 1425 # first truncate the files on disk
1421 1426 end = self.start(rev)
1422 1427 if not self._inline:
1423 1428 transaction.add(self.datafile, end)
1424 1429 end = rev * self._io.size
1425 1430 else:
1426 1431 end += rev * self._io.size
1427 1432
1428 1433 transaction.add(self.indexfile, end)
1429 1434
1430 1435 # then reset internal state in memory to forget those revisions
1431 1436 self._cache = None
1432 1437 self._chunkclear()
1433 1438 for x in xrange(rev, len(self)):
1434 1439 del self.nodemap[self.node(x)]
1435 1440
1436 1441 del self.index[rev:-1]
1437 1442
1438 1443 def checksize(self):
1439 1444 expected = 0
1440 1445 if len(self):
1441 1446 expected = max(0, self.end(len(self) - 1))
1442 1447
1443 1448 try:
1444 1449 f = self.opener(self.datafile)
1445 1450 f.seek(0, 2)
1446 1451 actual = f.tell()
1447 1452 dd = actual - expected
1448 1453 except IOError, inst:
1449 1454 if inst.errno != errno.ENOENT:
1450 1455 raise
1451 1456 dd = 0
1452 1457
1453 1458 try:
1454 1459 f = self.opener(self.indexfile)
1455 1460 f.seek(0, 2)
1456 1461 actual = f.tell()
1457 1462 s = self._io.size
1458 1463 i = max(0, actual // s)
1459 1464 di = actual - (i * s)
1460 1465 if self._inline:
1461 1466 databytes = 0
1462 1467 for r in self:
1463 1468 databytes += max(0, self.length(r))
1464 1469 dd = 0
1465 1470 di = actual - len(self) * s - databytes
1466 1471 except IOError, inst:
1467 1472 if inst.errno != errno.ENOENT:
1468 1473 raise
1469 1474 di = 0
1470 1475
1471 1476 return (dd, di)
1472 1477
1473 1478 def files(self):
1474 1479 res = [self.indexfile]
1475 1480 if not self._inline:
1476 1481 res.append(self.datafile)
1477 1482 return res
General Comments 0
You need to be logged in to leave comments. Login now