##// END OF EJS Templates
revlog.addgroup(): always use _addrevision() to add new revlog entries...
Benoit Boissinot -
r12624:557988c6 default
parent child Browse files
Show More
@@ -1,1509 +1,1465 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # amount of data read unconditionally, should be >= 4
42 42 # when not inline: threshold for using lazy index
43 43 _prereadsize = 1048576
44 44 # max size of revlog with inline data
45 45 _maxinline = 131072
46 46
47 47 RevlogError = error.RevlogError
48 48 LookupError = error.LookupError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def compress(text):
83 83 """ generate a possibly-compressed representation of text """
84 84 if not text:
85 85 return ("", text)
86 86 l = len(text)
87 87 bin = None
88 88 if l < 44:
89 89 pass
90 90 elif l > 1000000:
91 91 # zlib makes an internal copy, thus doubling memory usage for
92 92 # large files, so lets do this in pieces
93 93 z = zlib.compressobj()
94 94 p = []
95 95 pos = 0
96 96 while pos < l:
97 97 pos2 = pos + 2**20
98 98 p.append(z.compress(text[pos:pos2]))
99 99 pos = pos2
100 100 p.append(z.flush())
101 101 if sum(map(len, p)) < l:
102 102 bin = "".join(p)
103 103 else:
104 104 bin = _compress(text)
105 105 if bin is None or len(bin) > l:
106 106 if text[0] == '\0':
107 107 return ("", text)
108 108 return ('u', text)
109 109 return ("", bin)
110 110
111 111 def decompress(bin):
112 112 """ decompress the given input """
113 113 if not bin:
114 114 return bin
115 115 t = bin[0]
116 116 if t == '\0':
117 117 return bin
118 118 if t == 'x':
119 119 return _decompress(bin)
120 120 if t == 'u':
121 121 return bin[1:]
122 122 raise RevlogError(_("unknown compression type %r") % t)
123 123
124 124 class lazyparser(object):
125 125 """
126 126 this class avoids the need to parse the entirety of large indices
127 127 """
128 128
129 129 # lazyparser is not safe to use on windows if win32 extensions not
130 130 # available. it keeps file handle open, which make it not possible
131 131 # to break hardlinks on local cloned repos.
132 132
133 133 def __init__(self, dataf):
134 134 try:
135 135 size = util.fstat(dataf).st_size
136 136 except AttributeError:
137 137 size = 0
138 138 self.dataf = dataf
139 139 self.s = struct.calcsize(indexformatng)
140 140 self.datasize = size
141 141 self.l = size // self.s
142 142 self.index = [None] * self.l
143 143 self.map = {nullid: nullrev}
144 144 self.allmap = 0
145 145 self.all = 0
146 146 self.mapfind_count = 0
147 147
148 148 def loadmap(self):
149 149 """
150 150 during a commit, we need to make sure the rev being added is
151 151 not a duplicate. This requires loading the entire index,
152 152 which is fairly slow. loadmap can load up just the node map,
153 153 which takes much less time.
154 154 """
155 155 if self.allmap:
156 156 return
157 157 end = self.datasize
158 158 self.allmap = 1
159 159 cur = 0
160 160 count = 0
161 161 blocksize = self.s * 256
162 162 self.dataf.seek(0)
163 163 while cur < end:
164 164 data = self.dataf.read(blocksize)
165 165 off = 0
166 166 for x in xrange(256):
167 167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 168 self.map[n] = count
169 169 count += 1
170 170 if count >= self.l:
171 171 break
172 172 off += self.s
173 173 cur += blocksize
174 174
175 175 def loadblock(self, blockstart, blocksize, data=None):
176 176 if self.all:
177 177 return
178 178 if data is None:
179 179 self.dataf.seek(blockstart)
180 180 if blockstart + blocksize > self.datasize:
181 181 # the revlog may have grown since we've started running,
182 182 # but we don't have space in self.index for more entries.
183 183 # limit blocksize so that we don't get too much data.
184 184 blocksize = max(self.datasize - blockstart, 0)
185 185 data = self.dataf.read(blocksize)
186 186 lend = len(data) // self.s
187 187 i = blockstart // self.s
188 188 off = 0
189 189 # lazyindex supports __delitem__
190 190 if lend > len(self.index) - i:
191 191 lend = len(self.index) - i
192 192 for x in xrange(lend):
193 193 if self.index[i + x] is None:
194 194 b = data[off : off + self.s]
195 195 self.index[i + x] = b
196 196 n = b[ngshaoffset:ngshaoffset + 20]
197 197 self.map[n] = i + x
198 198 off += self.s
199 199
200 200 def findnode(self, node):
201 201 """search backwards through the index file for a specific node"""
202 202 if self.allmap:
203 203 return None
204 204
205 205 # hg log will cause many many searches for the manifest
206 206 # nodes. After we get called a few times, just load the whole
207 207 # thing.
208 208 if self.mapfind_count > 8:
209 209 self.loadmap()
210 210 if node in self.map:
211 211 return node
212 212 return None
213 213 self.mapfind_count += 1
214 214 last = self.l - 1
215 215 while self.index[last] != None:
216 216 if last == 0:
217 217 self.all = 1
218 218 self.allmap = 1
219 219 return None
220 220 last -= 1
221 221 end = (last + 1) * self.s
222 222 blocksize = self.s * 256
223 223 while end >= 0:
224 224 start = max(end - blocksize, 0)
225 225 self.dataf.seek(start)
226 226 data = self.dataf.read(end - start)
227 227 findend = end - start
228 228 while True:
229 229 # we're searching backwards, so we have to make sure
230 230 # we don't find a changeset where this node is a parent
231 231 off = data.find(node, 0, findend)
232 232 findend = off
233 233 if off >= 0:
234 234 i = off / self.s
235 235 off = i * self.s
236 236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 237 if n == node:
238 238 self.map[n] = i + start / self.s
239 239 return node
240 240 else:
241 241 break
242 242 end -= blocksize
243 243 return None
244 244
245 245 def loadindex(self, i=None, end=None):
246 246 if self.all:
247 247 return
248 248 all = False
249 249 if i is None:
250 250 blockstart = 0
251 251 blocksize = (65536 / self.s) * self.s
252 252 end = self.datasize
253 253 all = True
254 254 else:
255 255 if end:
256 256 blockstart = i * self.s
257 257 end = end * self.s
258 258 blocksize = end - blockstart
259 259 else:
260 260 blockstart = (i & ~1023) * self.s
261 261 blocksize = self.s * 1024
262 262 end = blockstart + blocksize
263 263 while blockstart < end:
264 264 self.loadblock(blockstart, blocksize)
265 265 blockstart += blocksize
266 266 if all:
267 267 self.all = True
268 268
269 269 class lazyindex(object):
270 270 """a lazy version of the index array"""
271 271 def __init__(self, parser):
272 272 self.p = parser
273 273 def __len__(self):
274 274 return len(self.p.index)
275 275 def load(self, pos):
276 276 if pos < 0:
277 277 pos += len(self.p.index)
278 278 self.p.loadindex(pos)
279 279 return self.p.index[pos]
280 280 def __getitem__(self, pos):
281 281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 282 def __setitem__(self, pos, item):
283 283 self.p.index[pos] = _pack(indexformatng, *item)
284 284 def __delitem__(self, pos):
285 285 del self.p.index[pos]
286 286 def insert(self, pos, e):
287 287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 288 def append(self, e):
289 289 self.p.index.append(_pack(indexformatng, *e))
290 290
291 291 class lazymap(object):
292 292 """a lazy version of the node map"""
293 293 def __init__(self, parser):
294 294 self.p = parser
295 295 def load(self, key):
296 296 n = self.p.findnode(key)
297 297 if n is None:
298 298 raise KeyError(key)
299 299 def __contains__(self, key):
300 300 if key in self.p.map:
301 301 return True
302 302 self.p.loadmap()
303 303 return key in self.p.map
304 304 def __iter__(self):
305 305 yield nullid
306 306 for i, ret in enumerate(self.p.index):
307 307 if not ret:
308 308 self.p.loadindex(i)
309 309 ret = self.p.index[i]
310 310 if isinstance(ret, str):
311 311 ret = _unpack(indexformatng, ret)
312 312 yield ret[7]
313 313 def __getitem__(self, key):
314 314 try:
315 315 return self.p.map[key]
316 316 except KeyError:
317 317 try:
318 318 self.load(key)
319 319 return self.p.map[key]
320 320 except KeyError:
321 321 raise KeyError("node " + hex(key))
322 322 def __setitem__(self, key, val):
323 323 self.p.map[key] = val
324 324 def __delitem__(self, key):
325 325 del self.p.map[key]
326 326
327 327 indexformatv0 = ">4l20s20s20s"
328 328 v0shaoffset = 56
329 329
330 330 class revlogoldio(object):
331 331 def __init__(self):
332 332 self.size = struct.calcsize(indexformatv0)
333 333
334 334 def parseindex(self, fp, data, inline):
335 335 s = self.size
336 336 index = []
337 337 nodemap = {nullid: nullrev}
338 338 n = off = 0
339 339 if len(data) == _prereadsize:
340 340 data += fp.read() # read the rest
341 341 l = len(data)
342 342 while off + s <= l:
343 343 cur = data[off:off + s]
344 344 off += s
345 345 e = _unpack(indexformatv0, cur)
346 346 # transform to revlogv1 format
347 347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 349 index.append(e2)
350 350 nodemap[e[6]] = n
351 351 n += 1
352 352
353 353 return index, nodemap, None
354 354
355 355 def packentry(self, entry, node, version, rev):
356 356 if gettype(entry[0]):
357 357 raise RevlogError(_("index entry flags need RevlogNG"))
358 358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 359 node(entry[5]), node(entry[6]), entry[7])
360 360 return _pack(indexformatv0, *e2)
361 361
362 362 # index ng:
363 363 # 6 bytes: offset
364 364 # 2 bytes: flags
365 365 # 4 bytes: compressed length
366 366 # 4 bytes: uncompressed length
367 367 # 4 bytes: base rev
368 368 # 4 bytes: link rev
369 369 # 4 bytes: parent 1 rev
370 370 # 4 bytes: parent 2 rev
371 371 # 32 bytes: nodeid
372 372 indexformatng = ">Qiiiiii20s12x"
373 373 ngshaoffset = 32
374 374 versionformat = ">I"
375 375
376 376 class revlogio(object):
377 377 def __init__(self):
378 378 self.size = struct.calcsize(indexformatng)
379 379
380 380 def parseindex(self, fp, data, inline):
381 381 if len(data) == _prereadsize:
382 382 if util.openhardlinks() and not inline:
383 383 # big index, let's parse it on demand
384 384 parser = lazyparser(fp)
385 385 index = lazyindex(parser)
386 386 nodemap = lazymap(parser)
387 387 e = list(index[0])
388 388 type = gettype(e[0])
389 389 e[0] = offset_type(0, type)
390 390 index[0] = e
391 391 return index, nodemap, None
392 392 else:
393 393 data += fp.read()
394 394
395 395 # call the C implementation to parse the index data
396 396 index, nodemap, cache = parsers.parse_index(data, inline)
397 397 return index, nodemap, cache
398 398
399 399 def packentry(self, entry, node, version, rev):
400 400 p = _pack(indexformatng, *entry)
401 401 if rev == 0:
402 402 p = _pack(versionformat, version) + p[4:]
403 403 return p
404 404
405 405 class revlog(object):
406 406 """
407 407 the underlying revision storage object
408 408
409 409 A revlog consists of two parts, an index and the revision data.
410 410
411 411 The index is a file with a fixed record size containing
412 412 information on each revision, including its nodeid (hash), the
413 413 nodeids of its parents, the position and offset of its data within
414 414 the data file, and the revision it's based on. Finally, each entry
415 415 contains a linkrev entry that can serve as a pointer to external
416 416 data.
417 417
418 418 The revision data itself is a linear collection of data chunks.
419 419 Each chunk represents a revision and is usually represented as a
420 420 delta against the previous chunk. To bound lookup time, runs of
421 421 deltas are limited to about 2 times the length of the original
422 422 version data. This makes retrieval of a version proportional to
423 423 its size, or O(1) relative to the number of revisions.
424 424
425 425 Both pieces of the revlog are written to in an append-only
426 426 fashion, which means we never need to rewrite a file to insert or
427 427 remove data, and can use some simple techniques to avoid the need
428 428 for locking while reading.
429 429 """
430 430 def __init__(self, opener, indexfile, shallowroot=None):
431 431 """
432 432 create a revlog object
433 433
434 434 opener is a function that abstracts the file opening operation
435 435 and can be used to implement COW semantics or the like.
436 436 """
437 437 self.indexfile = indexfile
438 438 self.datafile = indexfile[:-2] + ".d"
439 439 self.opener = opener
440 440 self._cache = None
441 441 self._chunkcache = (0, '')
442 442 self.nodemap = {nullid: nullrev}
443 443 self.index = []
444 444 self._shallowroot = shallowroot
445 445 self._parentdelta = 0
446 446
447 447 v = REVLOG_DEFAULT_VERSION
448 448 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 449 v = opener.options['defversion']
450 450 if v & REVLOGNG:
451 451 v |= REVLOGNGINLINEDATA
452 452 if v & REVLOGNG and 'parentdelta' in opener.options:
453 453 self._parentdelta = 1
454 454
455 455 if shallowroot:
456 456 v |= REVLOGSHALLOW
457 457
458 458 i = ''
459 459 try:
460 460 f = self.opener(self.indexfile)
461 461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 462 i = f.read()
463 463 else:
464 464 i = f.read(_prereadsize)
465 465 if len(i) > 0:
466 466 v = struct.unpack(versionformat, i[:4])[0]
467 467 except IOError, inst:
468 468 if inst.errno != errno.ENOENT:
469 469 raise
470 470
471 471 self.version = v
472 472 self._inline = v & REVLOGNGINLINEDATA
473 473 self._shallow = v & REVLOGSHALLOW
474 474 flags = v & ~0xFFFF
475 475 fmt = v & 0xFFFF
476 476 if fmt == REVLOGV0 and flags:
477 477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 478 % (self.indexfile, flags >> 16))
479 479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 481 % (self.indexfile, flags >> 16))
482 482 elif fmt > REVLOGNG:
483 483 raise RevlogError(_("index %s unknown format %d")
484 484 % (self.indexfile, fmt))
485 485
486 486 self._io = revlogio()
487 487 if self.version == REVLOGV0:
488 488 self._io = revlogoldio()
489 489 if i:
490 490 try:
491 491 d = self._io.parseindex(f, i, self._inline)
492 492 except (ValueError, IndexError):
493 493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 494 self.index, self.nodemap, self._chunkcache = d
495 495 if not self._chunkcache:
496 496 self._chunkclear()
497 497
498 498 # add the magic null revision at -1 (if it hasn't been done already)
499 499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 500 self.index[-1][7] != nullid) :
501 501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502 502
503 503 def _loadindex(self, start, end):
504 504 """load a block of indexes all at once from the lazy parser"""
505 505 if isinstance(self.index, lazyindex):
506 506 self.index.p.loadindex(start, end)
507 507
508 508 def _loadindexmap(self):
509 509 """loads both the map and the index from the lazy parser"""
510 510 if isinstance(self.index, lazyindex):
511 511 p = self.index.p
512 512 p.loadindex()
513 513 self.nodemap = p.map
514 514
515 515 def _loadmap(self):
516 516 """loads the map from the lazy parser"""
517 517 if isinstance(self.nodemap, lazymap):
518 518 self.nodemap.p.loadmap()
519 519 self.nodemap = self.nodemap.p.map
520 520
521 521 def tip(self):
522 522 return self.node(len(self.index) - 2)
523 523 def __len__(self):
524 524 return len(self.index) - 1
525 525 def __iter__(self):
526 526 for i in xrange(len(self)):
527 527 yield i
528 528 def rev(self, node):
529 529 try:
530 530 return self.nodemap[node]
531 531 except KeyError:
532 532 raise LookupError(node, self.indexfile, _('no node'))
533 533 def node(self, rev):
534 534 return self.index[rev][7]
535 535 def linkrev(self, rev):
536 536 return self.index[rev][4]
537 537 def parents(self, node):
538 538 i = self.index
539 539 d = i[self.rev(node)]
540 540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 541 def parentrevs(self, rev):
542 542 return self.index[rev][5:7]
543 543 def start(self, rev):
544 544 return int(self.index[rev][0] >> 16)
545 545 def end(self, rev):
546 546 return self.start(rev) + self.length(rev)
547 547 def length(self, rev):
548 548 return self.index[rev][1]
549 549 def base(self, rev):
550 550 return self.index[rev][3]
551 551 def flags(self, rev):
552 552 return self.index[rev][0] & 0xFFFF
553 553 def rawsize(self, rev):
554 554 """return the length of the uncompressed text for a given revision"""
555 555 l = self.index[rev][2]
556 556 if l >= 0:
557 557 return l
558 558
559 559 t = self.revision(self.node(rev))
560 560 return len(t)
561 561 size = rawsize
562 562
563 563 def reachable(self, node, stop=None):
564 564 """return the set of all nodes ancestral to a given node, including
565 565 the node itself, stopping when stop is matched"""
566 566 reachable = set((node,))
567 567 visit = [node]
568 568 if stop:
569 569 stopn = self.rev(stop)
570 570 else:
571 571 stopn = 0
572 572 while visit:
573 573 n = visit.pop(0)
574 574 if n == stop:
575 575 continue
576 576 if n == nullid:
577 577 continue
578 578 for p in self.parents(n):
579 579 if self.rev(p) < stopn:
580 580 continue
581 581 if p not in reachable:
582 582 reachable.add(p)
583 583 visit.append(p)
584 584 return reachable
585 585
586 586 def ancestors(self, *revs):
587 587 """Generate the ancestors of 'revs' in reverse topological order.
588 588
589 589 Yield a sequence of revision numbers starting with the parents
590 590 of each revision in revs, i.e., each revision is *not* considered
591 591 an ancestor of itself. Results are in breadth-first order:
592 592 parents of each rev in revs, then parents of those, etc. Result
593 593 does not include the null revision."""
594 594 visit = list(revs)
595 595 seen = set([nullrev])
596 596 while visit:
597 597 for parent in self.parentrevs(visit.pop(0)):
598 598 if parent not in seen:
599 599 visit.append(parent)
600 600 seen.add(parent)
601 601 yield parent
602 602
603 603 def descendants(self, *revs):
604 604 """Generate the descendants of 'revs' in revision order.
605 605
606 606 Yield a sequence of revision numbers starting with a child of
607 607 some rev in revs, i.e., each revision is *not* considered a
608 608 descendant of itself. Results are ordered by revision number (a
609 609 topological sort)."""
610 610 seen = set(revs)
611 611 for i in xrange(min(revs) + 1, len(self)):
612 612 for x in self.parentrevs(i):
613 613 if x != nullrev and x in seen:
614 614 seen.add(i)
615 615 yield i
616 616 break
617 617
618 618 def findmissing(self, common=None, heads=None):
619 619 """Return the ancestors of heads that are not ancestors of common.
620 620
621 621 More specifically, return a list of nodes N such that every N
622 622 satisfies the following constraints:
623 623
624 624 1. N is an ancestor of some node in 'heads'
625 625 2. N is not an ancestor of any node in 'common'
626 626
627 627 The list is sorted by revision number, meaning it is
628 628 topologically sorted.
629 629
630 630 'heads' and 'common' are both lists of node IDs. If heads is
631 631 not supplied, uses all of the revlog's heads. If common is not
632 632 supplied, uses nullid."""
633 633 if common is None:
634 634 common = [nullid]
635 635 if heads is None:
636 636 heads = self.heads()
637 637
638 638 common = [self.rev(n) for n in common]
639 639 heads = [self.rev(n) for n in heads]
640 640
641 641 # we want the ancestors, but inclusive
642 642 has = set(self.ancestors(*common))
643 643 has.add(nullrev)
644 644 has.update(common)
645 645
646 646 # take all ancestors from heads that aren't in has
647 647 missing = set()
648 648 visit = [r for r in heads if r not in has]
649 649 while visit:
650 650 r = visit.pop(0)
651 651 if r in missing:
652 652 continue
653 653 else:
654 654 missing.add(r)
655 655 for p in self.parentrevs(r):
656 656 if p not in has:
657 657 visit.append(p)
658 658 missing = list(missing)
659 659 missing.sort()
660 660 return [self.node(r) for r in missing]
661 661
662 662 def nodesbetween(self, roots=None, heads=None):
663 663 """Return a topological path from 'roots' to 'heads'.
664 664
665 665 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
666 666 topologically sorted list of all nodes N that satisfy both of
667 667 these constraints:
668 668
669 669 1. N is a descendant of some node in 'roots'
670 670 2. N is an ancestor of some node in 'heads'
671 671
672 672 Every node is considered to be both a descendant and an ancestor
673 673 of itself, so every reachable node in 'roots' and 'heads' will be
674 674 included in 'nodes'.
675 675
676 676 'outroots' is the list of reachable nodes in 'roots', i.e., the
677 677 subset of 'roots' that is returned in 'nodes'. Likewise,
678 678 'outheads' is the subset of 'heads' that is also in 'nodes'.
679 679
680 680 'roots' and 'heads' are both lists of node IDs. If 'roots' is
681 681 unspecified, uses nullid as the only root. If 'heads' is
682 682 unspecified, uses list of all of the revlog's heads."""
683 683 nonodes = ([], [], [])
684 684 if roots is not None:
685 685 roots = list(roots)
686 686 if not roots:
687 687 return nonodes
688 688 lowestrev = min([self.rev(n) for n in roots])
689 689 else:
690 690 roots = [nullid] # Everybody's a descendent of nullid
691 691 lowestrev = nullrev
692 692 if (lowestrev == nullrev) and (heads is None):
693 693 # We want _all_ the nodes!
694 694 return ([self.node(r) for r in self], [nullid], list(self.heads()))
695 695 if heads is None:
696 696 # All nodes are ancestors, so the latest ancestor is the last
697 697 # node.
698 698 highestrev = len(self) - 1
699 699 # Set ancestors to None to signal that every node is an ancestor.
700 700 ancestors = None
701 701 # Set heads to an empty dictionary for later discovery of heads
702 702 heads = {}
703 703 else:
704 704 heads = list(heads)
705 705 if not heads:
706 706 return nonodes
707 707 ancestors = set()
708 708 # Turn heads into a dictionary so we can remove 'fake' heads.
709 709 # Also, later we will be using it to filter out the heads we can't
710 710 # find from roots.
711 711 heads = dict.fromkeys(heads, 0)
712 712 # Start at the top and keep marking parents until we're done.
713 713 nodestotag = set(heads)
714 714 # Remember where the top was so we can use it as a limit later.
715 715 highestrev = max([self.rev(n) for n in nodestotag])
716 716 while nodestotag:
717 717 # grab a node to tag
718 718 n = nodestotag.pop()
719 719 # Never tag nullid
720 720 if n == nullid:
721 721 continue
722 722 # A node's revision number represents its place in a
723 723 # topologically sorted list of nodes.
724 724 r = self.rev(n)
725 725 if r >= lowestrev:
726 726 if n not in ancestors:
727 727 # If we are possibly a descendent of one of the roots
728 728 # and we haven't already been marked as an ancestor
729 729 ancestors.add(n) # Mark as ancestor
730 730 # Add non-nullid parents to list of nodes to tag.
731 731 nodestotag.update([p for p in self.parents(n) if
732 732 p != nullid])
733 733 elif n in heads: # We've seen it before, is it a fake head?
734 734 # So it is, real heads should not be the ancestors of
735 735 # any other heads.
736 736 heads.pop(n)
737 737 if not ancestors:
738 738 return nonodes
739 739 # Now that we have our set of ancestors, we want to remove any
740 740 # roots that are not ancestors.
741 741
742 742 # If one of the roots was nullid, everything is included anyway.
743 743 if lowestrev > nullrev:
744 744 # But, since we weren't, let's recompute the lowest rev to not
745 745 # include roots that aren't ancestors.
746 746
747 747 # Filter out roots that aren't ancestors of heads
748 748 roots = [n for n in roots if n in ancestors]
749 749 # Recompute the lowest revision
750 750 if roots:
751 751 lowestrev = min([self.rev(n) for n in roots])
752 752 else:
753 753 # No more roots? Return empty list
754 754 return nonodes
755 755 else:
756 756 # We are descending from nullid, and don't need to care about
757 757 # any other roots.
758 758 lowestrev = nullrev
759 759 roots = [nullid]
760 760 # Transform our roots list into a set.
761 761 descendents = set(roots)
762 762 # Also, keep the original roots so we can filter out roots that aren't
763 763 # 'real' roots (i.e. are descended from other roots).
764 764 roots = descendents.copy()
765 765 # Our topologically sorted list of output nodes.
766 766 orderedout = []
767 767 # Don't start at nullid since we don't want nullid in our output list,
768 768 # and if nullid shows up in descedents, empty parents will look like
769 769 # they're descendents.
770 770 for r in xrange(max(lowestrev, 0), highestrev + 1):
771 771 n = self.node(r)
772 772 isdescendent = False
773 773 if lowestrev == nullrev: # Everybody is a descendent of nullid
774 774 isdescendent = True
775 775 elif n in descendents:
776 776 # n is already a descendent
777 777 isdescendent = True
778 778 # This check only needs to be done here because all the roots
779 779 # will start being marked is descendents before the loop.
780 780 if n in roots:
781 781 # If n was a root, check if it's a 'real' root.
782 782 p = tuple(self.parents(n))
783 783 # If any of its parents are descendents, it's not a root.
784 784 if (p[0] in descendents) or (p[1] in descendents):
785 785 roots.remove(n)
786 786 else:
787 787 p = tuple(self.parents(n))
788 788 # A node is a descendent if either of its parents are
789 789 # descendents. (We seeded the dependents list with the roots
790 790 # up there, remember?)
791 791 if (p[0] in descendents) or (p[1] in descendents):
792 792 descendents.add(n)
793 793 isdescendent = True
794 794 if isdescendent and ((ancestors is None) or (n in ancestors)):
795 795 # Only include nodes that are both descendents and ancestors.
796 796 orderedout.append(n)
797 797 if (ancestors is not None) and (n in heads):
798 798 # We're trying to figure out which heads are reachable
799 799 # from roots.
800 800 # Mark this head as having been reached
801 801 heads[n] = 1
802 802 elif ancestors is None:
803 803 # Otherwise, we're trying to discover the heads.
804 804 # Assume this is a head because if it isn't, the next step
805 805 # will eventually remove it.
806 806 heads[n] = 1
807 807 # But, obviously its parents aren't.
808 808 for p in self.parents(n):
809 809 heads.pop(p, None)
810 810 heads = [n for n in heads.iterkeys() if heads[n] != 0]
811 811 roots = list(roots)
812 812 assert orderedout
813 813 assert roots
814 814 assert heads
815 815 return (orderedout, roots, heads)
816 816
817 817 def heads(self, start=None, stop=None):
818 818 """return the list of all nodes that have no children
819 819
820 820 if start is specified, only heads that are descendants of
821 821 start will be returned
822 822 if stop is specified, it will consider all the revs from stop
823 823 as if they had no children
824 824 """
825 825 if start is None and stop is None:
826 826 count = len(self)
827 827 if not count:
828 828 return [nullid]
829 829 ishead = [1] * (count + 1)
830 830 index = self.index
831 831 for r in xrange(count):
832 832 e = index[r]
833 833 ishead[e[5]] = ishead[e[6]] = 0
834 834 return [self.node(r) for r in xrange(count) if ishead[r]]
835 835
836 836 if start is None:
837 837 start = nullid
838 838 if stop is None:
839 839 stop = []
840 840 stoprevs = set([self.rev(n) for n in stop])
841 841 startrev = self.rev(start)
842 842 reachable = set((startrev,))
843 843 heads = set((startrev,))
844 844
845 845 parentrevs = self.parentrevs
846 846 for r in xrange(startrev + 1, len(self)):
847 847 for p in parentrevs(r):
848 848 if p in reachable:
849 849 if r not in stoprevs:
850 850 reachable.add(r)
851 851 heads.add(r)
852 852 if p in heads and p not in stoprevs:
853 853 heads.remove(p)
854 854
855 855 return [self.node(r) for r in heads]
856 856
857 857 def children(self, node):
858 858 """find the children of a given node"""
859 859 c = []
860 860 p = self.rev(node)
861 861 for r in range(p + 1, len(self)):
862 862 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
863 863 if prevs:
864 864 for pr in prevs:
865 865 if pr == p:
866 866 c.append(self.node(r))
867 867 elif p == nullrev:
868 868 c.append(self.node(r))
869 869 return c
870 870
871 871 def descendant(self, start, end):
872 872 for i in self.descendants(start):
873 873 if i == end:
874 874 return True
875 875 elif i > end:
876 876 break
877 877 return False
878 878
879 879 def ancestor(self, a, b):
880 880 """calculate the least common ancestor of nodes a and b"""
881 881
882 882 # fast path, check if it is a descendant
883 883 a, b = self.rev(a), self.rev(b)
884 884 start, end = sorted((a, b))
885 885 if self.descendant(start, end):
886 886 return self.node(start)
887 887
888 888 def parents(rev):
889 889 return [p for p in self.parentrevs(rev) if p != nullrev]
890 890
891 891 c = ancestor.ancestor(a, b, parents)
892 892 if c is None:
893 893 return nullid
894 894
895 895 return self.node(c)
896 896
897 897 def _match(self, id):
898 898 if isinstance(id, (long, int)):
899 899 # rev
900 900 return self.node(id)
901 901 if len(id) == 20:
902 902 # possibly a binary node
903 903 # odds of a binary node being all hex in ASCII are 1 in 10**25
904 904 try:
905 905 node = id
906 906 self.rev(node) # quick search the index
907 907 return node
908 908 except LookupError:
909 909 pass # may be partial hex id
910 910 try:
911 911 # str(rev)
912 912 rev = int(id)
913 913 if str(rev) != id:
914 914 raise ValueError
915 915 if rev < 0:
916 916 rev = len(self) + rev
917 917 if rev < 0 or rev >= len(self):
918 918 raise ValueError
919 919 return self.node(rev)
920 920 except (ValueError, OverflowError):
921 921 pass
922 922 if len(id) == 40:
923 923 try:
924 924 # a full hex nodeid?
925 925 node = bin(id)
926 926 self.rev(node)
927 927 return node
928 928 except (TypeError, LookupError):
929 929 pass
930 930
931 931 def _partialmatch(self, id):
932 932 if len(id) < 40:
933 933 try:
934 934 # hex(node)[:...]
935 935 l = len(id) // 2 # grab an even number of digits
936 936 bin_id = bin(id[:l * 2])
937 937 nl = [n for n in self.nodemap if n[:l] == bin_id]
938 938 nl = [n for n in nl if hex(n).startswith(id)]
939 939 if len(nl) > 0:
940 940 if len(nl) == 1:
941 941 return nl[0]
942 942 raise LookupError(id, self.indexfile,
943 943 _('ambiguous identifier'))
944 944 return None
945 945 except TypeError:
946 946 pass
947 947
948 948 def lookup(self, id):
949 949 """locate a node based on:
950 950 - revision number or str(revision number)
951 951 - nodeid or subset of hex nodeid
952 952 """
953 953 n = self._match(id)
954 954 if n is not None:
955 955 return n
956 956 n = self._partialmatch(id)
957 957 if n:
958 958 return n
959 959
960 960 raise LookupError(id, self.indexfile, _('no match found'))
961 961
962 962 def cmp(self, node, text):
963 963 """compare text with a given file revision
964 964
965 965 returns True if text is different than what is stored.
966 966 """
967 967 p1, p2 = self.parents(node)
968 968 return hash(text, p1, p2) != node
969 969
970 970 def _addchunk(self, offset, data):
971 971 o, d = self._chunkcache
972 972 # try to add to existing cache
973 973 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
974 974 self._chunkcache = o, d + data
975 975 else:
976 976 self._chunkcache = offset, data
977 977
978 978 def _loadchunk(self, offset, length):
979 979 if self._inline:
980 980 df = self.opener(self.indexfile)
981 981 else:
982 982 df = self.opener(self.datafile)
983 983
984 984 readahead = max(65536, length)
985 985 df.seek(offset)
986 986 d = df.read(readahead)
987 987 self._addchunk(offset, d)
988 988 if readahead > length:
989 989 return d[:length]
990 990 return d
991 991
992 992 def _getchunk(self, offset, length):
993 993 o, d = self._chunkcache
994 994 l = len(d)
995 995
996 996 # is it in the cache?
997 997 cachestart = offset - o
998 998 cacheend = cachestart + length
999 999 if cachestart >= 0 and cacheend <= l:
1000 1000 if cachestart == 0 and cacheend == l:
1001 1001 return d # avoid a copy
1002 1002 return d[cachestart:cacheend]
1003 1003
1004 1004 return self._loadchunk(offset, length)
1005 1005
1006 1006 def _chunkraw(self, startrev, endrev):
1007 1007 start = self.start(startrev)
1008 1008 length = self.end(endrev) - start
1009 1009 if self._inline:
1010 1010 start += (startrev + 1) * self._io.size
1011 1011 return self._getchunk(start, length)
1012 1012
1013 1013 def _chunk(self, rev):
1014 1014 return decompress(self._chunkraw(rev, rev))
1015 1015
1016 1016 def _chunkclear(self):
1017 1017 self._chunkcache = (0, '')
1018 1018
1019 1019 def deltaparent(self, rev):
1020 1020 """return previous revision or parentrev according to flags"""
1021 1021 if self.flags(rev) & REVIDX_PARENTDELTA:
1022 1022 return self.parentrevs(rev)[0]
1023 1023 else:
1024 1024 return rev - 1
1025 1025
1026 1026 def revdiff(self, rev1, rev2):
1027 1027 """return or calculate a delta between two revisions"""
1028 1028 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1029 1029 return self._chunk(rev2)
1030 1030
1031 1031 return mdiff.textdiff(self.revision(self.node(rev1)),
1032 1032 self.revision(self.node(rev2)))
1033 1033
1034 1034 def revision(self, node):
1035 1035 """return an uncompressed revision of a given node"""
1036 1036 cachedrev = None
1037 1037 if node == nullid:
1038 1038 return ""
1039 1039 if self._cache:
1040 1040 if self._cache[0] == node:
1041 1041 return self._cache[2]
1042 1042 cachedrev = self._cache[1]
1043 1043
1044 1044 # look up what we need to read
1045 1045 text = None
1046 1046 rev = self.rev(node)
1047 1047 base = self.base(rev)
1048 1048
1049 1049 # check rev flags
1050 1050 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1051 1051 raise RevlogError(_('incompatible revision flag %x') %
1052 1052 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1053 1053
1054 1054 # build delta chain
1055 1055 self._loadindex(base, rev + 1)
1056 1056 chain = []
1057 1057 index = self.index # for performance
1058 1058 iterrev = rev
1059 1059 e = index[iterrev]
1060 1060 while iterrev != base and iterrev != cachedrev:
1061 1061 chain.append(iterrev)
1062 1062 if e[0] & REVIDX_PARENTDELTA:
1063 1063 iterrev = e[5]
1064 1064 else:
1065 1065 iterrev -= 1
1066 1066 e = index[iterrev]
1067 1067 chain.reverse()
1068 1068 base = iterrev
1069 1069
1070 1070 if iterrev == cachedrev:
1071 1071 # cache hit
1072 1072 text = self._cache[2]
1073 1073
1074 1074 # drop cache to save memory
1075 1075 self._cache = None
1076 1076
1077 1077 self._chunkraw(base, rev)
1078 1078 if text is None:
1079 1079 text = self._chunk(base)
1080 1080
1081 1081 bins = [self._chunk(r) for r in chain]
1082 1082 text = mdiff.patches(text, bins)
1083 1083 p1, p2 = self.parents(node)
1084 1084 if (node != hash(text, p1, p2) and
1085 1085 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1086 1086 raise RevlogError(_("integrity check failed on %s:%d")
1087 1087 % (self.indexfile, rev))
1088 1088
1089 1089 self._cache = (node, rev, text)
1090 1090 return text
1091 1091
1092 1092 def checkinlinesize(self, tr, fp=None):
1093 1093 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1094 1094 return
1095 1095
1096 1096 trinfo = tr.find(self.indexfile)
1097 1097 if trinfo is None:
1098 1098 raise RevlogError(_("%s not found in the transaction")
1099 1099 % self.indexfile)
1100 1100
1101 1101 trindex = trinfo[2]
1102 1102 dataoff = self.start(trindex)
1103 1103
1104 1104 tr.add(self.datafile, dataoff)
1105 1105
1106 1106 if fp:
1107 1107 fp.flush()
1108 1108 fp.close()
1109 1109
1110 1110 df = self.opener(self.datafile, 'w')
1111 1111 try:
1112 1112 for r in self:
1113 1113 df.write(self._chunkraw(r, r))
1114 1114 finally:
1115 1115 df.close()
1116 1116
1117 1117 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1118 1118 self.version &= ~(REVLOGNGINLINEDATA)
1119 1119 self._inline = False
1120 1120 for i in self:
1121 1121 e = self._io.packentry(self.index[i], self.node, self.version, i)
1122 1122 fp.write(e)
1123 1123
1124 1124 # if we don't call rename, the temp file will never replace the
1125 1125 # real index
1126 1126 fp.rename()
1127 1127
1128 1128 tr.replace(self.indexfile, trindex * self._io.size)
1129 1129 self._chunkclear()
1130 1130
1131 1131 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1132 1132 """add a revision to the log
1133 1133
1134 1134 text - the revision data to add
1135 1135 transaction - the transaction object used for rollback
1136 1136 link - the linkrev data to add
1137 1137 p1, p2 - the parent nodeids of the revision
1138 1138 cachedelta - an optional precomputed delta
1139 1139 """
1140 1140 node = hash(text, p1, p2)
1141 1141 if (node in self.nodemap and
1142 1142 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1143 1143 return node
1144 1144
1145 1145 dfh = None
1146 1146 if not self._inline:
1147 1147 dfh = self.opener(self.datafile, "a")
1148 1148 ifh = self.opener(self.indexfile, "a+")
1149 1149 try:
1150 1150 return self._addrevision(node, text, transaction, link, p1, p2,
1151 1151 cachedelta, ifh, dfh)
1152 1152 finally:
1153 1153 if dfh:
1154 1154 dfh.close()
1155 1155 ifh.close()
1156 1156
1157 1157 def _addrevision(self, node, text, transaction, link, p1, p2,
1158 1158 cachedelta, ifh, dfh):
1159 1159
1160 1160 def buildtext(cachedelta):
1161 1161 if text is not None:
1162 1162 return text
1163 1163 # flush any pending writes here so we can read it in revision
1164 1164 if dfh:
1165 1165 dfh.flush()
1166 1166 ifh.flush()
1167 1167 basetext = self.revision(self.node(cachedelta[0]))
1168 1168 patchedtext = mdiff.patch(basetext, cachedelta[1])
1169 1169 chk = hash(patchedtext, p1, p2)
1170 1170 if chk != node:
1171 1171 raise RevlogError(_("consistency error in delta"))
1172 1172 return patchedtext
1173 1173
1174 1174 curr = len(self)
1175 1175 prev = curr - 1
1176 1176 base = curr
1177 1177 offset = self.end(prev)
1178 1178 flags = 0
1179 1179 d = None
1180 1180
1181 1181 if self._parentdelta:
1182 1182 deltarev, deltanode = self.rev(p1), p1
1183 1183 flags = REVIDX_PARENTDELTA
1184 1184 else:
1185 1185 deltarev, deltanode = prev, self.node(prev)
1186 1186
1187 1187 # should we try to build a delta?
1188 1188 if deltarev != nullrev:
1189 1189 # can we use the cached delta?
1190 1190 if cachedelta:
1191 1191 cacherev, d = cachedelta
1192 1192 if cacherev != deltarev:
1193 1193 text = buildtext(cachedelta)
1194 1194 d = None
1195 1195 if d is None:
1196 1196 ptext = self.revision(deltanode)
1197 1197 d = mdiff.textdiff(ptext, text)
1198 1198 data = compress(d)
1199 1199 l = len(data[1]) + len(data[0])
1200 1200 base = self.base(deltarev)
1201 1201 dist = l + offset - self.start(base)
1202 1202
1203 1203 # full versions are inserted when the needed deltas
1204 1204 # become comparable to the uncompressed text
1205 1205 # or the base revision is punched
1206 1206 if text is None:
1207 1207 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1208 1208 cachedelta[1])
1209 1209 else:
1210 1210 textlen = len(text)
1211 1211 if (d is None or dist > textlen * 2 or
1212 1212 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1213 1213 text = buildtext(cachedelta)
1214 1214 data = compress(text)
1215 1215 l = len(data[1]) + len(data[0])
1216 1216 base = curr
1217 1217
1218 1218 e = (offset_type(offset, flags), l, textlen,
1219 1219 base, link, self.rev(p1), self.rev(p2), node)
1220 1220 self.index.insert(-1, e)
1221 1221 self.nodemap[node] = curr
1222 1222
1223 1223 entry = self._io.packentry(e, self.node, self.version, curr)
1224 1224 if not self._inline:
1225 1225 transaction.add(self.datafile, offset)
1226 1226 transaction.add(self.indexfile, curr * len(entry))
1227 1227 if data[0]:
1228 1228 dfh.write(data[0])
1229 1229 dfh.write(data[1])
1230 1230 dfh.flush()
1231 1231 ifh.write(entry)
1232 1232 else:
1233 1233 offset += curr * self._io.size
1234 1234 transaction.add(self.indexfile, offset, curr)
1235 1235 ifh.write(entry)
1236 1236 ifh.write(data[0])
1237 1237 ifh.write(data[1])
1238 1238 self.checkinlinesize(transaction, ifh)
1239 1239
1240 1240 if type(text) == str: # only accept immutable objects
1241 1241 self._cache = (node, curr, text)
1242 1242 return node
1243 1243
1244 1244 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1245 1245 """Calculate a delta group, yielding a sequence of changegroup chunks
1246 1246 (strings).
1247 1247
1248 1248 Given a list of changeset revs, return a set of deltas and
1249 1249 metadata corresponding to nodes. The first delta is
1250 1250 first parent(nodelist[0]) -> nodelist[0], the receiver is
1251 1251 guaranteed to have this parent as it has all history before
1252 1252 these changesets. In the case firstparent is nullrev the
1253 1253 changegroup starts with a full revision.
1254 1254 fullrev forces the insertion of the full revision, necessary
1255 1255 in the case of shallow clones where the first parent might
1256 1256 not exist at the reciever.
1257 1257 """
1258 1258
1259 1259 revs = [self.rev(n) for n in nodelist]
1260 1260
1261 1261 # if we don't have any revisions touched by these changesets, bail
1262 1262 if not revs:
1263 1263 yield changegroup.closechunk()
1264 1264 return
1265 1265
1266 1266 # add the parent of the first rev
1267 1267 p = self.parentrevs(revs[0])[0]
1268 1268 revs.insert(0, p)
1269 1269 if p == nullrev:
1270 1270 fullrev = True
1271 1271
1272 1272 # build deltas
1273 1273 for d in xrange(len(revs) - 1):
1274 1274 a, b = revs[d], revs[d + 1]
1275 1275 nb = self.node(b)
1276 1276
1277 1277 if infocollect is not None:
1278 1278 infocollect(nb)
1279 1279
1280 1280 p = self.parents(nb)
1281 1281 meta = nb + p[0] + p[1] + lookup(nb)
1282 1282 if fullrev:
1283 1283 d = self.revision(nb)
1284 1284 meta += mdiff.trivialdiffheader(len(d))
1285 1285 fullrev = False
1286 1286 else:
1287 1287 d = self.revdiff(a, b)
1288 1288 yield changegroup.chunkheader(len(meta) + len(d))
1289 1289 yield meta
1290 1290 yield d
1291 1291
1292 1292 yield changegroup.closechunk()
1293 1293
1294 1294 def addgroup(self, bundle, linkmapper, transaction):
1295 1295 """
1296 1296 add a delta group
1297 1297
1298 1298 given a set of deltas, add them to the revision log. the
1299 1299 first delta is against its parent, which should be in our
1300 1300 log, the rest are against the previous delta.
1301 1301 """
1302 1302
1303 #track the base of the current delta log
1304 r = len(self)
1305 t = r - 1
1303 # track the base of the current delta log
1306 1304 node = None
1307 1305
1308 base = prev = nullrev
1309 start = end = textlen = 0
1306 r = len(self)
1307 end = 0
1310 1308 if r:
1311 end = self.end(t)
1312
1309 end = self.end(r - 1)
1313 1310 ifh = self.opener(self.indexfile, "a+")
1314 1311 isize = r * self._io.size
1315 1312 if self._inline:
1316 1313 transaction.add(self.indexfile, end + isize, r)
1317 1314 dfh = None
1318 1315 else:
1319 1316 transaction.add(self.indexfile, isize, r)
1320 1317 transaction.add(self.datafile, end)
1321 1318 dfh = self.opener(self.datafile, "a")
1322 1319
1323 1320 try:
1324 1321 # loop through our set of deltas
1325 1322 chain = None
1326 1323 while 1:
1327 1324 chunkdata = bundle.parsechunk()
1328 1325 if not chunkdata:
1329 1326 break
1330 1327 node = chunkdata['node']
1331 1328 p1 = chunkdata['p1']
1332 1329 p2 = chunkdata['p2']
1333 1330 cs = chunkdata['cs']
1334 1331 delta = chunkdata['data']
1335 1332
1336 1333 link = linkmapper(cs)
1337 1334 if (node in self.nodemap and
1338 1335 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1339 1336 # this can happen if two branches make the same change
1340 1337 chain = node
1341 1338 continue
1342 1339
1343 1340 for p in (p1, p2):
1344 1341 if not p in self.nodemap:
1345 1342 if self._shallow:
1346 1343 # add null entries for missing parents
1347 if base == nullrev:
1348 base = len(self)
1349 e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1350 0, 0, base, nullrev, nullrev, nullrev, p)
1351 self.index.insert(-1, e)
1352 self.nodemap[p] = r
1353 entry = self._io.packentry(e, self.node,
1354 self.version, r)
1355 ifh.write(entry)
1356 t, r = r, r + 1
1344 # XXX FIXME
1345 #if base == nullrev:
1346 # base = len(self)
1347 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1348 # 0, 0, base, nullrev, nullrev, nullrev, p)
1349 #self.index.insert(-1, e)
1350 #self.nodemap[p] = r
1351 #entry = self._io.packentry(e, self.node,
1352 # self.version, r)
1353 #ifh.write(entry)
1354 #t, r = r, r + 1
1355 raise LookupError(p, self.indexfile,
1356 _('unknown parent'))
1357 1357 else:
1358 1358 raise LookupError(p, self.indexfile,
1359 1359 _('unknown parent'))
1360 1360
1361 1361 if not chain:
1362 1362 # retrieve the parent revision of the delta chain
1363 1363 chain = p1
1364 1364 if not chain in self.nodemap:
1365 1365 raise LookupError(chain, self.indexfile, _('unknown base'))
1366 1366
1367 # full versions are inserted when the needed deltas become
1368 # comparable to the uncompressed text or when the previous
1369 # version is not the one we have a delta against. We use
1370 # the size of the previous full rev as a proxy for the
1371 # current size.
1372
1373 if chain == prev:
1374 cdelta = compress(delta)
1375 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1376 textlen = mdiff.patchedsize(textlen, delta)
1377
1378 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1379 # flush our writes here so we can read it in revision
1380 if dfh:
1381 dfh.flush()
1382 ifh.flush()
1383 text = self.revision(chain)
1384 text = mdiff.patch(text, delta)
1385 del delta
1386 chk = self._addrevision(node, text, transaction, link,
1387 p1, p2, None, ifh, dfh)
1388 if not dfh and not self._inline:
1389 # addrevision switched from inline to conventional
1390 # reopen the index
1391 dfh = self.opener(self.datafile, "a")
1392 ifh = self.opener(self.indexfile, "a")
1393 if chk != node:
1394 raise RevlogError(_("consistency error adding group"))
1395 textlen = len(text)
1396 else:
1397 e = (offset_type(end, 0), cdeltalen, textlen, base,
1398 link, self.rev(p1), self.rev(p2), node)
1399 self.index.insert(-1, e)
1400 self.nodemap[node] = r
1401 entry = self._io.packentry(e, self.node, self.version, r)
1402 if self._inline:
1403 ifh.write(entry)
1404 ifh.write(cdelta[0])
1405 ifh.write(cdelta[1])
1406 self.checkinlinesize(transaction, ifh)
1407 if not self._inline:
1408 dfh = self.opener(self.datafile, "a")
1409 ifh = self.opener(self.indexfile, "a")
1410 else:
1411 dfh.write(cdelta[0])
1412 dfh.write(cdelta[1])
1413 ifh.write(entry)
1414
1415 t, r, chain, prev = r, r + 1, node, node
1416 base = self.base(t)
1417 start = self.start(base)
1418 end = self.end(t)
1367 chainrev = self.rev(chain)
1368 chain = self._addrevision(node, None, transaction, link,
1369 p1, p2, (chainrev, delta), ifh, dfh)
1370 if not dfh and not self._inline:
1371 # addrevision switched from inline to conventional
1372 # reopen the index
1373 dfh = self.opener(self.datafile, "a")
1374 ifh = self.opener(self.indexfile, "a")
1419 1375 finally:
1420 1376 if dfh:
1421 1377 dfh.close()
1422 1378 ifh.close()
1423 1379
1424 1380 return node
1425 1381
1426 1382 def strip(self, minlink, transaction):
1427 1383 """truncate the revlog on the first revision with a linkrev >= minlink
1428 1384
1429 1385 This function is called when we're stripping revision minlink and
1430 1386 its descendants from the repository.
1431 1387
1432 1388 We have to remove all revisions with linkrev >= minlink, because
1433 1389 the equivalent changelog revisions will be renumbered after the
1434 1390 strip.
1435 1391
1436 1392 So we truncate the revlog on the first of these revisions, and
1437 1393 trust that the caller has saved the revisions that shouldn't be
1438 1394 removed and that it'll readd them after this truncation.
1439 1395 """
1440 1396 if len(self) == 0:
1441 1397 return
1442 1398
1443 1399 if isinstance(self.index, lazyindex):
1444 1400 self._loadindexmap()
1445 1401
1446 1402 for rev in self:
1447 1403 if self.index[rev][4] >= minlink:
1448 1404 break
1449 1405 else:
1450 1406 return
1451 1407
1452 1408 # first truncate the files on disk
1453 1409 end = self.start(rev)
1454 1410 if not self._inline:
1455 1411 transaction.add(self.datafile, end)
1456 1412 end = rev * self._io.size
1457 1413 else:
1458 1414 end += rev * self._io.size
1459 1415
1460 1416 transaction.add(self.indexfile, end)
1461 1417
1462 1418 # then reset internal state in memory to forget those revisions
1463 1419 self._cache = None
1464 1420 self._chunkclear()
1465 1421 for x in xrange(rev, len(self)):
1466 1422 del self.nodemap[self.node(x)]
1467 1423
1468 1424 del self.index[rev:-1]
1469 1425
1470 1426 def checksize(self):
1471 1427 expected = 0
1472 1428 if len(self):
1473 1429 expected = max(0, self.end(len(self) - 1))
1474 1430
1475 1431 try:
1476 1432 f = self.opener(self.datafile)
1477 1433 f.seek(0, 2)
1478 1434 actual = f.tell()
1479 1435 dd = actual - expected
1480 1436 except IOError, inst:
1481 1437 if inst.errno != errno.ENOENT:
1482 1438 raise
1483 1439 dd = 0
1484 1440
1485 1441 try:
1486 1442 f = self.opener(self.indexfile)
1487 1443 f.seek(0, 2)
1488 1444 actual = f.tell()
1489 1445 s = self._io.size
1490 1446 i = max(0, actual // s)
1491 1447 di = actual - (i * s)
1492 1448 if self._inline:
1493 1449 databytes = 0
1494 1450 for r in self:
1495 1451 databytes += max(0, self.length(r))
1496 1452 dd = 0
1497 1453 di = actual - len(self) * s - databytes
1498 1454 except IOError, inst:
1499 1455 if inst.errno != errno.ENOENT:
1500 1456 raise
1501 1457 di = 0
1502 1458
1503 1459 return (dd, di)
1504 1460
1505 1461 def files(self):
1506 1462 res = [self.indexfile]
1507 1463 if not self._inline:
1508 1464 res.append(self.datafile)
1509 1465 return res
General Comments 0
You need to be logged in to leave comments. Login now