##// END OF EJS Templates
revlog: support using an existing file handle when reading revlogs...
Gregory Szorc -
r26377:dfef0d3b default
parent child Browse files
Show More
@@ -1,1660 +1,1676 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 208 self._cache = None
209 209 self._basecache = None
210 210 self._chunkcache = (0, '')
211 211 self._chunkcachesize = 65536
212 212 self._maxchainlen = None
213 213 self._aggressivemergedeltas = False
214 214 self.index = []
215 215 self._pcache = {}
216 216 self._nodecache = {nullid: nullrev}
217 217 self._nodepos = None
218 218
219 219 v = REVLOG_DEFAULT_VERSION
220 220 opts = getattr(opener, 'options', None)
221 221 if opts is not None:
222 222 if 'revlogv1' in opts:
223 223 if 'generaldelta' in opts:
224 224 v |= REVLOGGENERALDELTA
225 225 else:
226 226 v = 0
227 227 if 'chunkcachesize' in opts:
228 228 self._chunkcachesize = opts['chunkcachesize']
229 229 if 'maxchainlen' in opts:
230 230 self._maxchainlen = opts['maxchainlen']
231 231 if 'aggressivemergedeltas' in opts:
232 232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233 233
234 234 if self._chunkcachesize <= 0:
235 235 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 236 'than 0') % self._chunkcachesize)
237 237 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 238 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 239 'of 2') % self._chunkcachesize)
240 240
241 241 indexdata = ''
242 242 self._initempty = True
243 243 try:
244 244 f = self.opener(self.indexfile)
245 245 indexdata = f.read()
246 246 f.close()
247 247 if len(indexdata) > 0:
248 248 v = struct.unpack(versionformat, indexdata[:4])[0]
249 249 self._initempty = False
250 250 except IOError as inst:
251 251 if inst.errno != errno.ENOENT:
252 252 raise
253 253
254 254 self.version = v
255 255 self._inline = v & REVLOGNGINLINEDATA
256 256 self._generaldelta = v & REVLOGGENERALDELTA
257 257 flags = v & ~0xFFFF
258 258 fmt = v & 0xFFFF
259 259 if fmt == REVLOGV0 and flags:
260 260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 261 % (self.indexfile, flags >> 16))
262 262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 264 % (self.indexfile, flags >> 16))
265 265 elif fmt > REVLOGNG:
266 266 raise RevlogError(_("index %s unknown format %d")
267 267 % (self.indexfile, fmt))
268 268
269 269 self._io = revlogio()
270 270 if self.version == REVLOGV0:
271 271 self._io = revlogoldio()
272 272 try:
273 273 d = self._io.parseindex(indexdata, self._inline)
274 274 except (ValueError, IndexError):
275 275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 276 self.index, nodemap, self._chunkcache = d
277 277 if nodemap is not None:
278 278 self.nodemap = self._nodecache = nodemap
279 279 if not self._chunkcache:
280 280 self._chunkclear()
281 281 # revnum -> (chain-length, sum-delta-length)
282 282 self._chaininfocache = {}
283 283
284 284 def tip(self):
285 285 return self.node(len(self.index) - 2)
286 286 def __contains__(self, rev):
287 287 return 0 <= rev < len(self)
288 288 def __len__(self):
289 289 return len(self.index) - 1
290 290 def __iter__(self):
291 291 return iter(xrange(len(self)))
292 292 def revs(self, start=0, stop=None):
293 293 """iterate over all rev in this revlog (from start to stop)"""
294 294 step = 1
295 295 if stop is not None:
296 296 if start > stop:
297 297 step = -1
298 298 stop += step
299 299 else:
300 300 stop = len(self)
301 301 return xrange(start, stop, step)
302 302
303 303 @util.propertycache
304 304 def nodemap(self):
305 305 self.rev(self.node(0))
306 306 return self._nodecache
307 307
308 308 def hasnode(self, node):
309 309 try:
310 310 self.rev(node)
311 311 return True
312 312 except KeyError:
313 313 return False
314 314
315 315 def clearcaches(self):
316 316 try:
317 317 self._nodecache.clearcaches()
318 318 except AttributeError:
319 319 self._nodecache = {nullid: nullrev}
320 320 self._nodepos = None
321 321
322 322 def rev(self, node):
323 323 try:
324 324 return self._nodecache[node]
325 325 except TypeError:
326 326 raise
327 327 except RevlogError:
328 328 # parsers.c radix tree lookup failed
329 329 raise LookupError(node, self.indexfile, _('no node'))
330 330 except KeyError:
331 331 # pure python cache lookup failed
332 332 n = self._nodecache
333 333 i = self.index
334 334 p = self._nodepos
335 335 if p is None:
336 336 p = len(i) - 2
337 337 for r in xrange(p, -1, -1):
338 338 v = i[r][7]
339 339 n[v] = r
340 340 if v == node:
341 341 self._nodepos = r - 1
342 342 return r
343 343 raise LookupError(node, self.indexfile, _('no node'))
344 344
345 345 def node(self, rev):
346 346 return self.index[rev][7]
347 347 def linkrev(self, rev):
348 348 return self.index[rev][4]
349 349 def parents(self, node):
350 350 i = self.index
351 351 d = i[self.rev(node)]
352 352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 353 def parentrevs(self, rev):
354 354 return self.index[rev][5:7]
355 355 def start(self, rev):
356 356 return int(self.index[rev][0] >> 16)
357 357 def end(self, rev):
358 358 return self.start(rev) + self.length(rev)
359 359 def length(self, rev):
360 360 return self.index[rev][1]
361 361 def chainbase(self, rev):
362 362 index = self.index
363 363 base = index[rev][3]
364 364 while base != rev:
365 365 rev = base
366 366 base = index[rev][3]
367 367 return base
368 368 def chainlen(self, rev):
369 369 return self._chaininfo(rev)[0]
370 370
371 371 def _chaininfo(self, rev):
372 372 chaininfocache = self._chaininfocache
373 373 if rev in chaininfocache:
374 374 return chaininfocache[rev]
375 375 index = self.index
376 376 generaldelta = self._generaldelta
377 377 iterrev = rev
378 378 e = index[iterrev]
379 379 clen = 0
380 380 compresseddeltalen = 0
381 381 while iterrev != e[3]:
382 382 clen += 1
383 383 compresseddeltalen += e[1]
384 384 if generaldelta:
385 385 iterrev = e[3]
386 386 else:
387 387 iterrev -= 1
388 388 if iterrev in chaininfocache:
389 389 t = chaininfocache[iterrev]
390 390 clen += t[0]
391 391 compresseddeltalen += t[1]
392 392 break
393 393 e = index[iterrev]
394 394 else:
395 395 # Add text length of base since decompressing that also takes
396 396 # work. For cache hits the length is already included.
397 397 compresseddeltalen += e[1]
398 398 r = (clen, compresseddeltalen)
399 399 chaininfocache[rev] = r
400 400 return r
401 401
402 402 def flags(self, rev):
403 403 return self.index[rev][0] & 0xFFFF
404 404 def rawsize(self, rev):
405 405 """return the length of the uncompressed text for a given revision"""
406 406 l = self.index[rev][2]
407 407 if l >= 0:
408 408 return l
409 409
410 410 t = self.revision(self.node(rev))
411 411 return len(t)
412 412 size = rawsize
413 413
414 414 def ancestors(self, revs, stoprev=0, inclusive=False):
415 415 """Generate the ancestors of 'revs' in reverse topological order.
416 416 Does not generate revs lower than stoprev.
417 417
418 418 See the documentation for ancestor.lazyancestors for more details."""
419 419
420 420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 421 inclusive=inclusive)
422 422
423 423 def descendants(self, revs):
424 424 """Generate the descendants of 'revs' in revision order.
425 425
426 426 Yield a sequence of revision numbers starting with a child of
427 427 some rev in revs, i.e., each revision is *not* considered a
428 428 descendant of itself. Results are ordered by revision number (a
429 429 topological sort)."""
430 430 first = min(revs)
431 431 if first == nullrev:
432 432 for i in self:
433 433 yield i
434 434 return
435 435
436 436 seen = set(revs)
437 437 for i in self.revs(start=first + 1):
438 438 for x in self.parentrevs(i):
439 439 if x != nullrev and x in seen:
440 440 seen.add(i)
441 441 yield i
442 442 break
443 443
444 444 def findcommonmissing(self, common=None, heads=None):
445 445 """Return a tuple of the ancestors of common and the ancestors of heads
446 446 that are not ancestors of common. In revset terminology, we return the
447 447 tuple:
448 448
449 449 ::common, (::heads) - (::common)
450 450
451 451 The list is sorted by revision number, meaning it is
452 452 topologically sorted.
453 453
454 454 'heads' and 'common' are both lists of node IDs. If heads is
455 455 not supplied, uses all of the revlog's heads. If common is not
456 456 supplied, uses nullid."""
457 457 if common is None:
458 458 common = [nullid]
459 459 if heads is None:
460 460 heads = self.heads()
461 461
462 462 common = [self.rev(n) for n in common]
463 463 heads = [self.rev(n) for n in heads]
464 464
465 465 # we want the ancestors, but inclusive
466 466 class lazyset(object):
467 467 def __init__(self, lazyvalues):
468 468 self.addedvalues = set()
469 469 self.lazyvalues = lazyvalues
470 470
471 471 def __contains__(self, value):
472 472 return value in self.addedvalues or value in self.lazyvalues
473 473
474 474 def __iter__(self):
475 475 added = self.addedvalues
476 476 for r in added:
477 477 yield r
478 478 for r in self.lazyvalues:
479 479 if not r in added:
480 480 yield r
481 481
482 482 def add(self, value):
483 483 self.addedvalues.add(value)
484 484
485 485 def update(self, values):
486 486 self.addedvalues.update(values)
487 487
488 488 has = lazyset(self.ancestors(common))
489 489 has.add(nullrev)
490 490 has.update(common)
491 491
492 492 # take all ancestors from heads that aren't in has
493 493 missing = set()
494 494 visit = collections.deque(r for r in heads if r not in has)
495 495 while visit:
496 496 r = visit.popleft()
497 497 if r in missing:
498 498 continue
499 499 else:
500 500 missing.add(r)
501 501 for p in self.parentrevs(r):
502 502 if p not in has:
503 503 visit.append(p)
504 504 missing = list(missing)
505 505 missing.sort()
506 506 return has, [self.node(r) for r in missing]
507 507
508 508 def incrementalmissingrevs(self, common=None):
509 509 """Return an object that can be used to incrementally compute the
510 510 revision numbers of the ancestors of arbitrary sets that are not
511 511 ancestors of common. This is an ancestor.incrementalmissingancestors
512 512 object.
513 513
514 514 'common' is a list of revision numbers. If common is not supplied, uses
515 515 nullrev.
516 516 """
517 517 if common is None:
518 518 common = [nullrev]
519 519
520 520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521 521
522 522 def findmissingrevs(self, common=None, heads=None):
523 523 """Return the revision numbers of the ancestors of heads that
524 524 are not ancestors of common.
525 525
526 526 More specifically, return a list of revision numbers corresponding to
527 527 nodes N such that every N satisfies the following constraints:
528 528
529 529 1. N is an ancestor of some node in 'heads'
530 530 2. N is not an ancestor of any node in 'common'
531 531
532 532 The list is sorted by revision number, meaning it is
533 533 topologically sorted.
534 534
535 535 'heads' and 'common' are both lists of revision numbers. If heads is
536 536 not supplied, uses all of the revlog's heads. If common is not
537 537 supplied, uses nullid."""
538 538 if common is None:
539 539 common = [nullrev]
540 540 if heads is None:
541 541 heads = self.headrevs()
542 542
543 543 inc = self.incrementalmissingrevs(common=common)
544 544 return inc.missingancestors(heads)
545 545
546 546 def findmissing(self, common=None, heads=None):
547 547 """Return the ancestors of heads that are not ancestors of common.
548 548
549 549 More specifically, return a list of nodes N such that every N
550 550 satisfies the following constraints:
551 551
552 552 1. N is an ancestor of some node in 'heads'
553 553 2. N is not an ancestor of any node in 'common'
554 554
555 555 The list is sorted by revision number, meaning it is
556 556 topologically sorted.
557 557
558 558 'heads' and 'common' are both lists of node IDs. If heads is
559 559 not supplied, uses all of the revlog's heads. If common is not
560 560 supplied, uses nullid."""
561 561 if common is None:
562 562 common = [nullid]
563 563 if heads is None:
564 564 heads = self.heads()
565 565
566 566 common = [self.rev(n) for n in common]
567 567 heads = [self.rev(n) for n in heads]
568 568
569 569 inc = self.incrementalmissingrevs(common=common)
570 570 return [self.node(r) for r in inc.missingancestors(heads)]
571 571
572 572 def nodesbetween(self, roots=None, heads=None):
573 573 """Return a topological path from 'roots' to 'heads'.
574 574
575 575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 576 topologically sorted list of all nodes N that satisfy both of
577 577 these constraints:
578 578
579 579 1. N is a descendant of some node in 'roots'
580 580 2. N is an ancestor of some node in 'heads'
581 581
582 582 Every node is considered to be both a descendant and an ancestor
583 583 of itself, so every reachable node in 'roots' and 'heads' will be
584 584 included in 'nodes'.
585 585
586 586 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 587 subset of 'roots' that is returned in 'nodes'. Likewise,
588 588 'outheads' is the subset of 'heads' that is also in 'nodes'.
589 589
590 590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 591 unspecified, uses nullid as the only root. If 'heads' is
592 592 unspecified, uses list of all of the revlog's heads."""
593 593 nonodes = ([], [], [])
594 594 if roots is not None:
595 595 roots = list(roots)
596 596 if not roots:
597 597 return nonodes
598 598 lowestrev = min([self.rev(n) for n in roots])
599 599 else:
600 600 roots = [nullid] # Everybody's a descendant of nullid
601 601 lowestrev = nullrev
602 602 if (lowestrev == nullrev) and (heads is None):
603 603 # We want _all_ the nodes!
604 604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = len(self) - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = set()
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, False)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = set(heads)
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendant of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors.add(n) # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.update([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a set.
671 671 descendants = set(roots)
672 672 # Also, keep the original roots so we can filter out roots that aren't
673 673 # 'real' roots (i.e. are descended from other roots).
674 674 roots = descendants.copy()
675 675 # Our topologically sorted list of output nodes.
676 676 orderedout = []
677 677 # Don't start at nullid since we don't want nullid in our output list,
678 678 # and if nullid shows up in descendants, empty parents will look like
679 679 # they're descendants.
680 680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 681 n = self.node(r)
682 682 isdescendant = False
683 683 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 684 isdescendant = True
685 685 elif n in descendants:
686 686 # n is already a descendant
687 687 isdescendant = True
688 688 # This check only needs to be done here because all the roots
689 689 # will start being marked is descendants before the loop.
690 690 if n in roots:
691 691 # If n was a root, check if it's a 'real' root.
692 692 p = tuple(self.parents(n))
693 693 # If any of its parents are descendants, it's not a root.
694 694 if (p[0] in descendants) or (p[1] in descendants):
695 695 roots.remove(n)
696 696 else:
697 697 p = tuple(self.parents(n))
698 698 # A node is a descendant if either of its parents are
699 699 # descendants. (We seeded the dependents list with the roots
700 700 # up there, remember?)
701 701 if (p[0] in descendants) or (p[1] in descendants):
702 702 descendants.add(n)
703 703 isdescendant = True
704 704 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 705 # Only include nodes that are both descendants and ancestors.
706 706 orderedout.append(n)
707 707 if (ancestors is not None) and (n in heads):
708 708 # We're trying to figure out which heads are reachable
709 709 # from roots.
710 710 # Mark this head as having been reached
711 711 heads[n] = True
712 712 elif ancestors is None:
713 713 # Otherwise, we're trying to discover the heads.
714 714 # Assume this is a head because if it isn't, the next step
715 715 # will eventually remove it.
716 716 heads[n] = True
717 717 # But, obviously its parents aren't.
718 718 for p in self.parents(n):
719 719 heads.pop(p, None)
720 720 heads = [n for n, flag in heads.iteritems() if flag]
721 721 roots = list(roots)
722 722 assert orderedout
723 723 assert roots
724 724 assert heads
725 725 return (orderedout, roots, heads)
726 726
727 727 def headrevs(self):
728 728 try:
729 729 return self.index.headrevs()
730 730 except AttributeError:
731 731 return self._headrevs()
732 732
733 733 def computephases(self, roots):
734 734 return self.index.computephasesmapsets(roots)
735 735
736 736 def _headrevs(self):
737 737 count = len(self)
738 738 if not count:
739 739 return [nullrev]
740 740 # we won't iter over filtered rev so nobody is a head at start
741 741 ishead = [0] * (count + 1)
742 742 index = self.index
743 743 for r in self:
744 744 ishead[r] = 1 # I may be an head
745 745 e = index[r]
746 746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 747 return [r for r, val in enumerate(ishead) if val]
748 748
749 749 def heads(self, start=None, stop=None):
750 750 """return the list of all nodes that have no children
751 751
752 752 if start is specified, only heads that are descendants of
753 753 start will be returned
754 754 if stop is specified, it will consider all the revs from stop
755 755 as if they had no children
756 756 """
757 757 if start is None and stop is None:
758 758 if not len(self):
759 759 return [nullid]
760 760 return [self.node(r) for r in self.headrevs()]
761 761
762 762 if start is None:
763 763 start = nullid
764 764 if stop is None:
765 765 stop = []
766 766 stoprevs = set([self.rev(n) for n in stop])
767 767 startrev = self.rev(start)
768 768 reachable = set((startrev,))
769 769 heads = set((startrev,))
770 770
771 771 parentrevs = self.parentrevs
772 772 for r in self.revs(start=startrev + 1):
773 773 for p in parentrevs(r):
774 774 if p in reachable:
775 775 if r not in stoprevs:
776 776 reachable.add(r)
777 777 heads.add(r)
778 778 if p in heads and p not in stoprevs:
779 779 heads.remove(p)
780 780
781 781 return [self.node(r) for r in heads]
782 782
783 783 def children(self, node):
784 784 """find the children of a given node"""
785 785 c = []
786 786 p = self.rev(node)
787 787 for r in self.revs(start=p + 1):
788 788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 789 if prevs:
790 790 for pr in prevs:
791 791 if pr == p:
792 792 c.append(self.node(r))
793 793 elif p == nullrev:
794 794 c.append(self.node(r))
795 795 return c
796 796
797 797 def descendant(self, start, end):
798 798 if start == nullrev:
799 799 return True
800 800 for i in self.descendants([start]):
801 801 if i == end:
802 802 return True
803 803 elif i > end:
804 804 break
805 805 return False
806 806
807 807 def commonancestorsheads(self, a, b):
808 808 """calculate all the heads of the common ancestors of nodes a and b"""
809 809 a, b = self.rev(a), self.rev(b)
810 810 try:
811 811 ancs = self.index.commonancestorsheads(a, b)
812 812 except (AttributeError, OverflowError): # C implementation failed
813 813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 814 return map(self.node, ancs)
815 815
816 816 def isancestor(self, a, b):
817 817 """return True if node a is an ancestor of node b
818 818
819 819 The implementation of this is trivial but the use of
820 820 commonancestorsheads is not."""
821 821 return a in self.commonancestorsheads(a, b)
822 822
823 823 def ancestor(self, a, b):
824 824 """calculate the "best" common ancestor of nodes a and b"""
825 825
826 826 a, b = self.rev(a), self.rev(b)
827 827 try:
828 828 ancs = self.index.ancestors(a, b)
829 829 except (AttributeError, OverflowError):
830 830 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 831 if ancs:
832 832 # choose a consistent winner when there's a tie
833 833 return min(map(self.node, ancs))
834 834 return nullid
835 835
836 836 def _match(self, id):
837 837 if isinstance(id, int):
838 838 # rev
839 839 return self.node(id)
840 840 if len(id) == 20:
841 841 # possibly a binary node
842 842 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 843 try:
844 844 node = id
845 845 self.rev(node) # quick search the index
846 846 return node
847 847 except LookupError:
848 848 pass # may be partial hex id
849 849 try:
850 850 # str(rev)
851 851 rev = int(id)
852 852 if str(rev) != id:
853 853 raise ValueError
854 854 if rev < 0:
855 855 rev = len(self) + rev
856 856 if rev < 0 or rev >= len(self):
857 857 raise ValueError
858 858 return self.node(rev)
859 859 except (ValueError, OverflowError):
860 860 pass
861 861 if len(id) == 40:
862 862 try:
863 863 # a full hex nodeid?
864 864 node = bin(id)
865 865 self.rev(node)
866 866 return node
867 867 except (TypeError, LookupError):
868 868 pass
869 869
870 870 def _partialmatch(self, id):
871 871 try:
872 872 n = self.index.partialmatch(id)
873 873 if n and self.hasnode(n):
874 874 return n
875 875 return None
876 876 except RevlogError:
877 877 # parsers.c radix tree lookup gave multiple matches
878 878 # fall through to slow path that filters hidden revisions
879 879 pass
880 880 except (AttributeError, ValueError):
881 881 # we are pure python, or key was too short to search radix tree
882 882 pass
883 883
884 884 if id in self._pcache:
885 885 return self._pcache[id]
886 886
887 887 if len(id) < 40:
888 888 try:
889 889 # hex(node)[:...]
890 890 l = len(id) // 2 # grab an even number of digits
891 891 prefix = bin(id[:l * 2])
892 892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 893 nl = [n for n in nl if hex(n).startswith(id) and
894 894 self.hasnode(n)]
895 895 if len(nl) > 0:
896 896 if len(nl) == 1:
897 897 self._pcache[id] = nl[0]
898 898 return nl[0]
899 899 raise LookupError(id, self.indexfile,
900 900 _('ambiguous identifier'))
901 901 return None
902 902 except TypeError:
903 903 pass
904 904
905 905 def lookup(self, id):
906 906 """locate a node based on:
907 907 - revision number or str(revision number)
908 908 - nodeid or subset of hex nodeid
909 909 """
910 910 n = self._match(id)
911 911 if n is not None:
912 912 return n
913 913 n = self._partialmatch(id)
914 914 if n:
915 915 return n
916 916
917 917 raise LookupError(id, self.indexfile, _('no match found'))
918 918
919 919 def cmp(self, node, text):
920 920 """compare text with a given file revision
921 921
922 922 returns True if text is different than what is stored.
923 923 """
924 924 p1, p2 = self.parents(node)
925 925 return hash(text, p1, p2) != node
926 926
927 927 def _addchunk(self, offset, data):
928 928 o, d = self._chunkcache
929 929 # try to add to existing cache
930 930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 931 self._chunkcache = o, d + data
932 932 else:
933 933 self._chunkcache = offset, data
934 934
935 def _loadchunk(self, offset, length):
936 if self._inline:
937 df = self.opener(self.indexfile)
935 def _loadchunk(self, offset, length, df=None):
936 """Load a chunk/segment from the revlog.
937
938 Accepts absolute offset, length to read, and an optional existing
939 file handle to read from.
940
941 If an existing file handle is passed, it will be seeked and the
942 original seek position will NOT be restored.
943 """
944 if df is not None:
945 closehandle = False
938 946 else:
939 df = self.opener(self.datafile)
947 if self._inline:
948 df = self.opener(self.indexfile)
949 else:
950 df = self.opener(self.datafile)
951 closehandle = True
940 952
941 953 # Cache data both forward and backward around the requested
942 954 # data, in a fixed size window. This helps speed up operations
943 955 # involving reading the revlog backwards.
944 956 cachesize = self._chunkcachesize
945 957 realoffset = offset & ~(cachesize - 1)
946 958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
947 959 - realoffset)
948 960 df.seek(realoffset)
949 961 d = df.read(reallength)
950 df.close()
962 if closehandle:
963 df.close()
951 964 self._addchunk(realoffset, d)
952 965 if offset != realoffset or reallength != length:
953 966 return util.buffer(d, offset - realoffset, length)
954 967 return d
955 968
956 def _getchunk(self, offset, length):
969 def _getchunk(self, offset, length, df=None):
957 970 o, d = self._chunkcache
958 971 l = len(d)
959 972
960 973 # is it in the cache?
961 974 cachestart = offset - o
962 975 cacheend = cachestart + length
963 976 if cachestart >= 0 and cacheend <= l:
964 977 if cachestart == 0 and cacheend == l:
965 978 return d # avoid a copy
966 979 return util.buffer(d, cachestart, cacheend - cachestart)
967 980
968 return self._loadchunk(offset, length)
981 return self._loadchunk(offset, length, df=df)
969 982
970 def _chunkraw(self, startrev, endrev):
983 def _chunkraw(self, startrev, endrev, df=None):
971 984 start = self.start(startrev)
972 985 end = self.end(endrev)
973 986 if self._inline:
974 987 start += (startrev + 1) * self._io.size
975 988 end += (endrev + 1) * self._io.size
976 989 length = end - start
977 return self._getchunk(start, length)
990 return self._getchunk(start, length, df=df)
978 991
979 def _chunk(self, rev):
980 return decompress(self._chunkraw(rev, rev))
992 def _chunk(self, rev, df=None):
993 return decompress(self._chunkraw(rev, rev, df=df))
981 994
982 def _chunks(self, revs):
995 def _chunks(self, revs, df=None):
983 996 '''faster version of [self._chunk(rev) for rev in revs]
984 997
985 998 Assumes that revs is in ascending order.'''
986 999 if not revs:
987 1000 return []
988 1001 start = self.start
989 1002 length = self.length
990 1003 inline = self._inline
991 1004 iosize = self._io.size
992 1005 buffer = util.buffer
993 1006
994 1007 l = []
995 1008 ladd = l.append
996 1009
997 1010 # preload the cache
998 1011 try:
999 1012 while True:
1000 1013 # ensure that the cache doesn't change out from under us
1001 1014 _cache = self._chunkcache
1002 self._chunkraw(revs[0], revs[-1])
1015 self._chunkraw(revs[0], revs[-1], df=df)
1003 1016 if _cache == self._chunkcache:
1004 1017 break
1005 1018 offset, data = _cache
1006 1019 except OverflowError:
1007 1020 # issue4215 - we can't cache a run of chunks greater than
1008 1021 # 2G on Windows
1009 return [self._chunk(rev) for rev in revs]
1022 return [self._chunk(rev, df=df) for rev in revs]
1010 1023
1011 1024 for rev in revs:
1012 1025 chunkstart = start(rev)
1013 1026 if inline:
1014 1027 chunkstart += (rev + 1) * iosize
1015 1028 chunklength = length(rev)
1016 1029 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1017 1030
1018 1031 return l
1019 1032
1020 1033 def _chunkclear(self):
1021 1034 self._chunkcache = (0, '')
1022 1035
1023 1036 def deltaparent(self, rev):
1024 1037 """return deltaparent of the given revision"""
1025 1038 base = self.index[rev][3]
1026 1039 if base == rev:
1027 1040 return nullrev
1028 1041 elif self._generaldelta:
1029 1042 return base
1030 1043 else:
1031 1044 return rev - 1
1032 1045
1033 1046 def revdiff(self, rev1, rev2):
1034 1047 """return or calculate a delta between two revisions"""
1035 1048 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1036 1049 return str(self._chunk(rev2))
1037 1050
1038 1051 return mdiff.textdiff(self.revision(rev1),
1039 1052 self.revision(rev2))
1040 1053
1041 def revision(self, nodeorrev):
1054 def revision(self, nodeorrev, _df=None):
1042 1055 """return an uncompressed revision of a given node or revision
1043 1056 number.
1057
1058 _df is an existing file handle to read from. It is meant to only be
1059 used internally.
1044 1060 """
1045 1061 if isinstance(nodeorrev, int):
1046 1062 rev = nodeorrev
1047 1063 node = self.node(rev)
1048 1064 else:
1049 1065 node = nodeorrev
1050 1066 rev = None
1051 1067
1052 1068 cachedrev = None
1053 1069 if node == nullid:
1054 1070 return ""
1055 1071 if self._cache:
1056 1072 if self._cache[0] == node:
1057 1073 return self._cache[2]
1058 1074 cachedrev = self._cache[1]
1059 1075
1060 1076 # look up what we need to read
1061 1077 text = None
1062 1078 if rev is None:
1063 1079 rev = self.rev(node)
1064 1080
1065 1081 # check rev flags
1066 1082 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1067 1083 raise RevlogError(_('incompatible revision flag %x') %
1068 1084 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1069 1085
1070 1086 # build delta chain
1071 1087 chain = []
1072 1088 index = self.index # for performance
1073 1089 generaldelta = self._generaldelta
1074 1090 iterrev = rev
1075 1091 e = index[iterrev]
1076 1092 while iterrev != e[3] and iterrev != cachedrev:
1077 1093 chain.append(iterrev)
1078 1094 if generaldelta:
1079 1095 iterrev = e[3]
1080 1096 else:
1081 1097 iterrev -= 1
1082 1098 e = index[iterrev]
1083 1099
1084 1100 if iterrev == cachedrev:
1085 1101 # cache hit
1086 1102 text = self._cache[2]
1087 1103 else:
1088 1104 chain.append(iterrev)
1089 1105 chain.reverse()
1090 1106
1091 1107 # drop cache to save memory
1092 1108 self._cache = None
1093 1109
1094 bins = self._chunks(chain)
1110 bins = self._chunks(chain, df=_df)
1095 1111 if text is None:
1096 1112 text = str(bins[0])
1097 1113 bins = bins[1:]
1098 1114
1099 1115 text = mdiff.patches(text, bins)
1100 1116
1101 1117 text = self._checkhash(text, node, rev)
1102 1118
1103 1119 self._cache = (node, rev, text)
1104 1120 return text
1105 1121
1106 1122 def hash(self, text, p1, p2):
1107 1123 """Compute a node hash.
1108 1124
1109 1125 Available as a function so that subclasses can replace the hash
1110 1126 as needed.
1111 1127 """
1112 1128 return hash(text, p1, p2)
1113 1129
1114 1130 def _checkhash(self, text, node, rev):
1115 1131 p1, p2 = self.parents(node)
1116 1132 self.checkhash(text, p1, p2, node, rev)
1117 1133 return text
1118 1134
1119 1135 def checkhash(self, text, p1, p2, node, rev=None):
1120 1136 if node != self.hash(text, p1, p2):
1121 1137 revornode = rev
1122 1138 if revornode is None:
1123 1139 revornode = templatefilters.short(hex(node))
1124 1140 raise RevlogError(_("integrity check failed on %s:%s")
1125 1141 % (self.indexfile, revornode))
1126 1142
1127 1143 def checkinlinesize(self, tr, fp=None):
1128 1144 """Check if the revlog is too big for inline and convert if so.
1129 1145
1130 1146 This should be called after revisions are added to the revlog. If the
1131 1147 revlog has grown too large to be an inline revlog, it will convert it
1132 1148 to use multiple index and data files.
1133 1149 """
1134 1150 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1135 1151 return
1136 1152
1137 1153 trinfo = tr.find(self.indexfile)
1138 1154 if trinfo is None:
1139 1155 raise RevlogError(_("%s not found in the transaction")
1140 1156 % self.indexfile)
1141 1157
1142 1158 trindex = trinfo[2]
1143 1159 if trindex is not None:
1144 1160 dataoff = self.start(trindex)
1145 1161 else:
1146 1162 # revlog was stripped at start of transaction, use all leftover data
1147 1163 trindex = len(self) - 1
1148 1164 dataoff = self.end(-2)
1149 1165
1150 1166 tr.add(self.datafile, dataoff)
1151 1167
1152 1168 if fp:
1153 1169 fp.flush()
1154 1170 fp.close()
1155 1171
1156 1172 df = self.opener(self.datafile, 'w')
1157 1173 try:
1158 1174 for r in self:
1159 1175 df.write(self._chunkraw(r, r))
1160 1176 finally:
1161 1177 df.close()
1162 1178
1163 1179 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1164 1180 self.version &= ~(REVLOGNGINLINEDATA)
1165 1181 self._inline = False
1166 1182 for i in self:
1167 1183 e = self._io.packentry(self.index[i], self.node, self.version, i)
1168 1184 fp.write(e)
1169 1185
1170 1186 # if we don't call close, the temp file will never replace the
1171 1187 # real index
1172 1188 fp.close()
1173 1189
1174 1190 tr.replace(self.indexfile, trindex * self._io.size)
1175 1191 self._chunkclear()
1176 1192
1177 1193 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1178 1194 node=None):
1179 1195 """add a revision to the log
1180 1196
1181 1197 text - the revision data to add
1182 1198 transaction - the transaction object used for rollback
1183 1199 link - the linkrev data to add
1184 1200 p1, p2 - the parent nodeids of the revision
1185 1201 cachedelta - an optional precomputed delta
1186 1202 node - nodeid of revision; typically node is not specified, and it is
1187 1203 computed by default as hash(text, p1, p2), however subclasses might
1188 1204 use different hashing method (and override checkhash() in such case)
1189 1205 """
1190 1206 if link == nullrev:
1191 1207 raise RevlogError(_("attempted to add linkrev -1 to %s")
1192 1208 % self.indexfile)
1193 1209
1194 1210 if len(text) > _maxentrysize:
1195 1211 raise RevlogError(
1196 1212 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1197 1213 % (self.indexfile, len(text)))
1198 1214
1199 1215 node = node or self.hash(text, p1, p2)
1200 1216 if node in self.nodemap:
1201 1217 return node
1202 1218
1203 1219 dfh = None
1204 1220 if not self._inline:
1205 1221 dfh = self.opener(self.datafile, "a")
1206 1222 ifh = self.opener(self.indexfile, "a+")
1207 1223 try:
1208 1224 return self._addrevision(node, text, transaction, link, p1, p2,
1209 1225 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1210 1226 finally:
1211 1227 if dfh:
1212 1228 dfh.close()
1213 1229 ifh.close()
1214 1230
1215 1231 def compress(self, text):
1216 1232 """ generate a possibly-compressed representation of text """
1217 1233 if not text:
1218 1234 return ("", text)
1219 1235 l = len(text)
1220 1236 bin = None
1221 1237 if l < 44:
1222 1238 pass
1223 1239 elif l > 1000000:
1224 1240 # zlib makes an internal copy, thus doubling memory usage for
1225 1241 # large files, so lets do this in pieces
1226 1242 z = zlib.compressobj()
1227 1243 p = []
1228 1244 pos = 0
1229 1245 while pos < l:
1230 1246 pos2 = pos + 2**20
1231 1247 p.append(z.compress(text[pos:pos2]))
1232 1248 pos = pos2
1233 1249 p.append(z.flush())
1234 1250 if sum(map(len, p)) < l:
1235 1251 bin = "".join(p)
1236 1252 else:
1237 1253 bin = _compress(text)
1238 1254 if bin is None or len(bin) > l:
1239 1255 if text[0] == '\0':
1240 1256 return ("", text)
1241 1257 return ('u', text)
1242 1258 return ("", bin)
1243 1259
1244 1260 def _isgooddelta(self, d, textlen):
1245 1261 """Returns True if the given delta is good. Good means that it is within
1246 1262 the disk span, disk size, and chain length bounds that we know to be
1247 1263 performant."""
1248 1264 if d is None:
1249 1265 return False
1250 1266
1251 1267 # - 'dist' is the distance from the base revision -- bounding it limits
1252 1268 # the amount of I/O we need to do.
1253 1269 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1254 1270 # to apply -- bounding it limits the amount of CPU we consume.
1255 1271 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1256 1272 if (dist > textlen * 4 or l > textlen or
1257 1273 compresseddeltalen > textlen * 2 or
1258 1274 (self._maxchainlen and chainlen > self._maxchainlen)):
1259 1275 return False
1260 1276
1261 1277 return True
1262 1278
1263 1279 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1264 1280 cachedelta, ifh, dfh, alwayscache=False):
1265 1281 """internal function to add revisions to the log
1266 1282
1267 1283 see addrevision for argument descriptions.
1268 1284 invariants:
1269 1285 - text is optional (can be None); if not set, cachedelta must be set.
1270 1286 if both are set, they must correspond to each other.
1271 1287 """
1272 1288 btext = [text]
1273 1289 def buildtext():
1274 1290 if btext[0] is not None:
1275 1291 return btext[0]
1276 1292 # flush any pending writes here so we can read it in revision
1277 1293 if dfh:
1278 1294 dfh.flush()
1279 1295 ifh.flush()
1280 1296 baserev = cachedelta[0]
1281 1297 delta = cachedelta[1]
1282 1298 # special case deltas which replace entire base; no need to decode
1283 1299 # base revision. this neatly avoids censored bases, which throw when
1284 1300 # they're decoded.
1285 1301 hlen = struct.calcsize(">lll")
1286 1302 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1287 1303 len(delta) - hlen):
1288 1304 btext[0] = delta[hlen:]
1289 1305 else:
1290 1306 basetext = self.revision(self.node(baserev))
1291 1307 btext[0] = mdiff.patch(basetext, delta)
1292 1308 try:
1293 1309 self.checkhash(btext[0], p1, p2, node)
1294 1310 if flags & REVIDX_ISCENSORED:
1295 1311 raise RevlogError(_('node %s is not censored') % node)
1296 1312 except CensoredNodeError:
1297 1313 # must pass the censored index flag to add censored revisions
1298 1314 if not flags & REVIDX_ISCENSORED:
1299 1315 raise
1300 1316 return btext[0]
1301 1317
1302 1318 def builddelta(rev):
1303 1319 # can we use the cached delta?
1304 1320 if cachedelta and cachedelta[0] == rev:
1305 1321 delta = cachedelta[1]
1306 1322 else:
1307 1323 t = buildtext()
1308 1324 if self.iscensored(rev):
1309 1325 # deltas based on a censored revision must replace the
1310 1326 # full content in one patch, so delta works everywhere
1311 1327 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1312 1328 delta = header + t
1313 1329 else:
1314 1330 ptext = self.revision(self.node(rev))
1315 1331 delta = mdiff.textdiff(ptext, t)
1316 1332 data = self.compress(delta)
1317 1333 l = len(data[1]) + len(data[0])
1318 1334 if basecache[0] == rev:
1319 1335 chainbase = basecache[1]
1320 1336 else:
1321 1337 chainbase = self.chainbase(rev)
1322 1338 dist = l + offset - self.start(chainbase)
1323 1339 if self._generaldelta:
1324 1340 base = rev
1325 1341 else:
1326 1342 base = chainbase
1327 1343 chainlen, compresseddeltalen = self._chaininfo(rev)
1328 1344 chainlen += 1
1329 1345 compresseddeltalen += l
1330 1346 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1331 1347
1332 1348 curr = len(self)
1333 1349 prev = curr - 1
1334 1350 base = chainbase = curr
1335 1351 chainlen = None
1336 1352 offset = self.end(prev)
1337 1353 d = None
1338 1354 if self._basecache is None:
1339 1355 self._basecache = (prev, self.chainbase(prev))
1340 1356 basecache = self._basecache
1341 1357 p1r, p2r = self.rev(p1), self.rev(p2)
1342 1358
1343 1359 # full versions are inserted when the needed deltas
1344 1360 # become comparable to the uncompressed text
1345 1361 if text is None:
1346 1362 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1347 1363 cachedelta[1])
1348 1364 else:
1349 1365 textlen = len(text)
1350 1366
1351 1367 # should we try to build a delta?
1352 1368 if prev != nullrev:
1353 1369 if self._generaldelta:
1354 1370 if p2r != nullrev and self._aggressivemergedeltas:
1355 1371 d = builddelta(p1r)
1356 1372 d2 = builddelta(p2r)
1357 1373 p1good = self._isgooddelta(d, textlen)
1358 1374 p2good = self._isgooddelta(d2, textlen)
1359 1375 if p1good and p2good:
1360 1376 # If both are good deltas, choose the smallest
1361 1377 if d2[1] < d[1]:
1362 1378 d = d2
1363 1379 elif p2good:
1364 1380 # If only p2 is good, use it
1365 1381 d = d2
1366 1382 elif p1good:
1367 1383 pass
1368 1384 else:
1369 1385 # Neither is good, try against prev to hopefully save us
1370 1386 # a fulltext.
1371 1387 d = builddelta(prev)
1372 1388 else:
1373 1389 # Pick whichever parent is closer to us (to minimize the
1374 1390 # chance of having to build a fulltext). Since
1375 1391 # nullrev == -1, any non-merge commit will always pick p1r.
1376 1392 drev = p2r if p2r > p1r else p1r
1377 1393 d = builddelta(drev)
1378 1394 # If the chosen delta will result in us making a full text,
1379 1395 # give it one last try against prev.
1380 1396 if drev != prev and not self._isgooddelta(d, textlen):
1381 1397 d = builddelta(prev)
1382 1398 else:
1383 1399 d = builddelta(prev)
1384 1400 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1385 1401
1386 1402 if not self._isgooddelta(d, textlen):
1387 1403 text = buildtext()
1388 1404 data = self.compress(text)
1389 1405 l = len(data[1]) + len(data[0])
1390 1406 base = chainbase = curr
1391 1407
1392 1408 e = (offset_type(offset, flags), l, textlen,
1393 1409 base, link, p1r, p2r, node)
1394 1410 self.index.insert(-1, e)
1395 1411 self.nodemap[node] = curr
1396 1412
1397 1413 entry = self._io.packentry(e, self.node, self.version, curr)
1398 1414 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1399 1415
1400 1416 if alwayscache and text is None:
1401 1417 text = buildtext()
1402 1418
1403 1419 if type(text) == str: # only accept immutable objects
1404 1420 self._cache = (node, curr, text)
1405 1421 self._basecache = (curr, chainbase)
1406 1422 return node
1407 1423
1408 1424 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1409 1425 curr = len(self) - 1
1410 1426 if not self._inline:
1411 1427 transaction.add(self.datafile, offset)
1412 1428 transaction.add(self.indexfile, curr * len(entry))
1413 1429 if data[0]:
1414 1430 dfh.write(data[0])
1415 1431 dfh.write(data[1])
1416 1432 dfh.flush()
1417 1433 ifh.write(entry)
1418 1434 else:
1419 1435 offset += curr * self._io.size
1420 1436 transaction.add(self.indexfile, offset, curr)
1421 1437 ifh.write(entry)
1422 1438 ifh.write(data[0])
1423 1439 ifh.write(data[1])
1424 1440 self.checkinlinesize(transaction, ifh)
1425 1441
1426 1442 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1427 1443 """
1428 1444 add a delta group
1429 1445
1430 1446 given a set of deltas, add them to the revision log. the
1431 1447 first delta is against its parent, which should be in our
1432 1448 log, the rest are against the previous delta.
1433 1449
1434 1450 If ``addrevisioncb`` is defined, it will be called with arguments of
1435 1451 this revlog and the node that was added.
1436 1452 """
1437 1453
1438 1454 # track the base of the current delta log
1439 1455 content = []
1440 1456 node = None
1441 1457
1442 1458 r = len(self)
1443 1459 end = 0
1444 1460 if r:
1445 1461 end = self.end(r - 1)
1446 1462 ifh = self.opener(self.indexfile, "a+")
1447 1463 isize = r * self._io.size
1448 1464 if self._inline:
1449 1465 transaction.add(self.indexfile, end + isize, r)
1450 1466 dfh = None
1451 1467 else:
1452 1468 transaction.add(self.indexfile, isize, r)
1453 1469 transaction.add(self.datafile, end)
1454 1470 dfh = self.opener(self.datafile, "a")
1455 1471 def flush():
1456 1472 if dfh:
1457 1473 dfh.flush()
1458 1474 ifh.flush()
1459 1475 try:
1460 1476 # loop through our set of deltas
1461 1477 chain = None
1462 1478 while True:
1463 1479 chunkdata = bundle.deltachunk(chain)
1464 1480 if not chunkdata:
1465 1481 break
1466 1482 node = chunkdata['node']
1467 1483 p1 = chunkdata['p1']
1468 1484 p2 = chunkdata['p2']
1469 1485 cs = chunkdata['cs']
1470 1486 deltabase = chunkdata['deltabase']
1471 1487 delta = chunkdata['delta']
1472 1488
1473 1489 content.append(node)
1474 1490
1475 1491 link = linkmapper(cs)
1476 1492 if node in self.nodemap:
1477 1493 # this can happen if two branches make the same change
1478 1494 chain = node
1479 1495 continue
1480 1496
1481 1497 for p in (p1, p2):
1482 1498 if p not in self.nodemap:
1483 1499 raise LookupError(p, self.indexfile,
1484 1500 _('unknown parent'))
1485 1501
1486 1502 if deltabase not in self.nodemap:
1487 1503 raise LookupError(deltabase, self.indexfile,
1488 1504 _('unknown delta base'))
1489 1505
1490 1506 baserev = self.rev(deltabase)
1491 1507
1492 1508 if baserev != nullrev and self.iscensored(baserev):
1493 1509 # if base is censored, delta must be full replacement in a
1494 1510 # single patch operation
1495 1511 hlen = struct.calcsize(">lll")
1496 1512 oldlen = self.rawsize(baserev)
1497 1513 newlen = len(delta) - hlen
1498 1514 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1499 1515 raise error.CensoredBaseError(self.indexfile,
1500 1516 self.node(baserev))
1501 1517
1502 1518 flags = REVIDX_DEFAULT_FLAGS
1503 1519 if self._peek_iscensored(baserev, delta, flush):
1504 1520 flags |= REVIDX_ISCENSORED
1505 1521
1506 1522 # We assume consumers of addrevisioncb will want to retrieve
1507 1523 # the added revision, which will require a call to
1508 1524 # revision(). revision() will fast path if there is a cache
1509 1525 # hit. So, we tell _addrevision() to always cache in this case.
1510 1526 chain = self._addrevision(node, None, transaction, link,
1511 1527 p1, p2, flags, (baserev, delta),
1512 1528 ifh, dfh,
1513 1529 alwayscache=bool(addrevisioncb))
1514 1530
1515 1531 if addrevisioncb:
1516 1532 addrevisioncb(self, chain)
1517 1533
1518 1534 if not dfh and not self._inline:
1519 1535 # addrevision switched from inline to conventional
1520 1536 # reopen the index
1521 1537 ifh.close()
1522 1538 dfh = self.opener(self.datafile, "a")
1523 1539 ifh = self.opener(self.indexfile, "a")
1524 1540 finally:
1525 1541 if dfh:
1526 1542 dfh.close()
1527 1543 ifh.close()
1528 1544
1529 1545 return content
1530 1546
1531 1547 def iscensored(self, rev):
1532 1548 """Check if a file revision is censored."""
1533 1549 return False
1534 1550
1535 1551 def _peek_iscensored(self, baserev, delta, flush):
1536 1552 """Quickly check if a delta produces a censored revision."""
1537 1553 return False
1538 1554
1539 1555 def getstrippoint(self, minlink):
1540 1556 """find the minimum rev that must be stripped to strip the linkrev
1541 1557
1542 1558 Returns a tuple containing the minimum rev and a set of all revs that
1543 1559 have linkrevs that will be broken by this strip.
1544 1560 """
1545 1561 brokenrevs = set()
1546 1562 strippoint = len(self)
1547 1563
1548 1564 heads = {}
1549 1565 futurelargelinkrevs = set()
1550 1566 for head in self.headrevs():
1551 1567 headlinkrev = self.linkrev(head)
1552 1568 heads[head] = headlinkrev
1553 1569 if headlinkrev >= minlink:
1554 1570 futurelargelinkrevs.add(headlinkrev)
1555 1571
1556 1572 # This algorithm involves walking down the rev graph, starting at the
1557 1573 # heads. Since the revs are topologically sorted according to linkrev,
1558 1574 # once all head linkrevs are below the minlink, we know there are
1559 1575 # no more revs that could have a linkrev greater than minlink.
1560 1576 # So we can stop walking.
1561 1577 while futurelargelinkrevs:
1562 1578 strippoint -= 1
1563 1579 linkrev = heads.pop(strippoint)
1564 1580
1565 1581 if linkrev < minlink:
1566 1582 brokenrevs.add(strippoint)
1567 1583 else:
1568 1584 futurelargelinkrevs.remove(linkrev)
1569 1585
1570 1586 for p in self.parentrevs(strippoint):
1571 1587 if p != nullrev:
1572 1588 plinkrev = self.linkrev(p)
1573 1589 heads[p] = plinkrev
1574 1590 if plinkrev >= minlink:
1575 1591 futurelargelinkrevs.add(plinkrev)
1576 1592
1577 1593 return strippoint, brokenrevs
1578 1594
1579 1595 def strip(self, minlink, transaction):
1580 1596 """truncate the revlog on the first revision with a linkrev >= minlink
1581 1597
1582 1598 This function is called when we're stripping revision minlink and
1583 1599 its descendants from the repository.
1584 1600
1585 1601 We have to remove all revisions with linkrev >= minlink, because
1586 1602 the equivalent changelog revisions will be renumbered after the
1587 1603 strip.
1588 1604
1589 1605 So we truncate the revlog on the first of these revisions, and
1590 1606 trust that the caller has saved the revisions that shouldn't be
1591 1607 removed and that it'll re-add them after this truncation.
1592 1608 """
1593 1609 if len(self) == 0:
1594 1610 return
1595 1611
1596 1612 rev, _ = self.getstrippoint(minlink)
1597 1613 if rev == len(self):
1598 1614 return
1599 1615
1600 1616 # first truncate the files on disk
1601 1617 end = self.start(rev)
1602 1618 if not self._inline:
1603 1619 transaction.add(self.datafile, end)
1604 1620 end = rev * self._io.size
1605 1621 else:
1606 1622 end += rev * self._io.size
1607 1623
1608 1624 transaction.add(self.indexfile, end)
1609 1625
1610 1626 # then reset internal state in memory to forget those revisions
1611 1627 self._cache = None
1612 1628 self._chaininfocache = {}
1613 1629 self._chunkclear()
1614 1630 for x in xrange(rev, len(self)):
1615 1631 del self.nodemap[self.node(x)]
1616 1632
1617 1633 del self.index[rev:-1]
1618 1634
1619 1635 def checksize(self):
1620 1636 expected = 0
1621 1637 if len(self):
1622 1638 expected = max(0, self.end(len(self) - 1))
1623 1639
1624 1640 try:
1625 1641 f = self.opener(self.datafile)
1626 1642 f.seek(0, 2)
1627 1643 actual = f.tell()
1628 1644 f.close()
1629 1645 dd = actual - expected
1630 1646 except IOError as inst:
1631 1647 if inst.errno != errno.ENOENT:
1632 1648 raise
1633 1649 dd = 0
1634 1650
1635 1651 try:
1636 1652 f = self.opener(self.indexfile)
1637 1653 f.seek(0, 2)
1638 1654 actual = f.tell()
1639 1655 f.close()
1640 1656 s = self._io.size
1641 1657 i = max(0, actual // s)
1642 1658 di = actual - (i * s)
1643 1659 if self._inline:
1644 1660 databytes = 0
1645 1661 for r in self:
1646 1662 databytes += max(0, self.length(r))
1647 1663 dd = 0
1648 1664 di = actual - len(self) * s - databytes
1649 1665 except IOError as inst:
1650 1666 if inst.errno != errno.ENOENT:
1651 1667 raise
1652 1668 di = 0
1653 1669
1654 1670 return (dd, di)
1655 1671
1656 1672 def files(self):
1657 1673 res = [self.indexfile]
1658 1674 if not self._inline:
1659 1675 res.append(self.datafile)
1660 1676 return res
General Comments 0
You need to be logged in to leave comments. Login now