##// END OF EJS Templates
revlog: don't flush data file after every added revision...
Gregory Szorc -
r26380:56a640b0 default
parent child Browse files
Show More
@@ -1,1680 +1,1679 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 208 self._cache = None
209 209 self._basecache = None
210 210 self._chunkcache = (0, '')
211 211 self._chunkcachesize = 65536
212 212 self._maxchainlen = None
213 213 self._aggressivemergedeltas = False
214 214 self.index = []
215 215 self._pcache = {}
216 216 self._nodecache = {nullid: nullrev}
217 217 self._nodepos = None
218 218
219 219 v = REVLOG_DEFAULT_VERSION
220 220 opts = getattr(opener, 'options', None)
221 221 if opts is not None:
222 222 if 'revlogv1' in opts:
223 223 if 'generaldelta' in opts:
224 224 v |= REVLOGGENERALDELTA
225 225 else:
226 226 v = 0
227 227 if 'chunkcachesize' in opts:
228 228 self._chunkcachesize = opts['chunkcachesize']
229 229 if 'maxchainlen' in opts:
230 230 self._maxchainlen = opts['maxchainlen']
231 231 if 'aggressivemergedeltas' in opts:
232 232 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233 233
234 234 if self._chunkcachesize <= 0:
235 235 raise RevlogError(_('revlog chunk cache size %r is not greater '
236 236 'than 0') % self._chunkcachesize)
237 237 elif self._chunkcachesize & (self._chunkcachesize - 1):
238 238 raise RevlogError(_('revlog chunk cache size %r is not a power '
239 239 'of 2') % self._chunkcachesize)
240 240
241 241 indexdata = ''
242 242 self._initempty = True
243 243 try:
244 244 f = self.opener(self.indexfile)
245 245 indexdata = f.read()
246 246 f.close()
247 247 if len(indexdata) > 0:
248 248 v = struct.unpack(versionformat, indexdata[:4])[0]
249 249 self._initempty = False
250 250 except IOError as inst:
251 251 if inst.errno != errno.ENOENT:
252 252 raise
253 253
254 254 self.version = v
255 255 self._inline = v & REVLOGNGINLINEDATA
256 256 self._generaldelta = v & REVLOGGENERALDELTA
257 257 flags = v & ~0xFFFF
258 258 fmt = v & 0xFFFF
259 259 if fmt == REVLOGV0 and flags:
260 260 raise RevlogError(_("index %s unknown flags %#04x for format v0")
261 261 % (self.indexfile, flags >> 16))
262 262 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
263 263 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
264 264 % (self.indexfile, flags >> 16))
265 265 elif fmt > REVLOGNG:
266 266 raise RevlogError(_("index %s unknown format %d")
267 267 % (self.indexfile, fmt))
268 268
269 269 self._io = revlogio()
270 270 if self.version == REVLOGV0:
271 271 self._io = revlogoldio()
272 272 try:
273 273 d = self._io.parseindex(indexdata, self._inline)
274 274 except (ValueError, IndexError):
275 275 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
276 276 self.index, nodemap, self._chunkcache = d
277 277 if nodemap is not None:
278 278 self.nodemap = self._nodecache = nodemap
279 279 if not self._chunkcache:
280 280 self._chunkclear()
281 281 # revnum -> (chain-length, sum-delta-length)
282 282 self._chaininfocache = {}
283 283
284 284 def tip(self):
285 285 return self.node(len(self.index) - 2)
286 286 def __contains__(self, rev):
287 287 return 0 <= rev < len(self)
288 288 def __len__(self):
289 289 return len(self.index) - 1
290 290 def __iter__(self):
291 291 return iter(xrange(len(self)))
292 292 def revs(self, start=0, stop=None):
293 293 """iterate over all rev in this revlog (from start to stop)"""
294 294 step = 1
295 295 if stop is not None:
296 296 if start > stop:
297 297 step = -1
298 298 stop += step
299 299 else:
300 300 stop = len(self)
301 301 return xrange(start, stop, step)
302 302
303 303 @util.propertycache
304 304 def nodemap(self):
305 305 self.rev(self.node(0))
306 306 return self._nodecache
307 307
308 308 def hasnode(self, node):
309 309 try:
310 310 self.rev(node)
311 311 return True
312 312 except KeyError:
313 313 return False
314 314
315 315 def clearcaches(self):
316 316 try:
317 317 self._nodecache.clearcaches()
318 318 except AttributeError:
319 319 self._nodecache = {nullid: nullrev}
320 320 self._nodepos = None
321 321
322 322 def rev(self, node):
323 323 try:
324 324 return self._nodecache[node]
325 325 except TypeError:
326 326 raise
327 327 except RevlogError:
328 328 # parsers.c radix tree lookup failed
329 329 raise LookupError(node, self.indexfile, _('no node'))
330 330 except KeyError:
331 331 # pure python cache lookup failed
332 332 n = self._nodecache
333 333 i = self.index
334 334 p = self._nodepos
335 335 if p is None:
336 336 p = len(i) - 2
337 337 for r in xrange(p, -1, -1):
338 338 v = i[r][7]
339 339 n[v] = r
340 340 if v == node:
341 341 self._nodepos = r - 1
342 342 return r
343 343 raise LookupError(node, self.indexfile, _('no node'))
344 344
345 345 def node(self, rev):
346 346 return self.index[rev][7]
347 347 def linkrev(self, rev):
348 348 return self.index[rev][4]
349 349 def parents(self, node):
350 350 i = self.index
351 351 d = i[self.rev(node)]
352 352 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
353 353 def parentrevs(self, rev):
354 354 return self.index[rev][5:7]
355 355 def start(self, rev):
356 356 return int(self.index[rev][0] >> 16)
357 357 def end(self, rev):
358 358 return self.start(rev) + self.length(rev)
359 359 def length(self, rev):
360 360 return self.index[rev][1]
361 361 def chainbase(self, rev):
362 362 index = self.index
363 363 base = index[rev][3]
364 364 while base != rev:
365 365 rev = base
366 366 base = index[rev][3]
367 367 return base
368 368 def chainlen(self, rev):
369 369 return self._chaininfo(rev)[0]
370 370
371 371 def _chaininfo(self, rev):
372 372 chaininfocache = self._chaininfocache
373 373 if rev in chaininfocache:
374 374 return chaininfocache[rev]
375 375 index = self.index
376 376 generaldelta = self._generaldelta
377 377 iterrev = rev
378 378 e = index[iterrev]
379 379 clen = 0
380 380 compresseddeltalen = 0
381 381 while iterrev != e[3]:
382 382 clen += 1
383 383 compresseddeltalen += e[1]
384 384 if generaldelta:
385 385 iterrev = e[3]
386 386 else:
387 387 iterrev -= 1
388 388 if iterrev in chaininfocache:
389 389 t = chaininfocache[iterrev]
390 390 clen += t[0]
391 391 compresseddeltalen += t[1]
392 392 break
393 393 e = index[iterrev]
394 394 else:
395 395 # Add text length of base since decompressing that also takes
396 396 # work. For cache hits the length is already included.
397 397 compresseddeltalen += e[1]
398 398 r = (clen, compresseddeltalen)
399 399 chaininfocache[rev] = r
400 400 return r
401 401
402 402 def flags(self, rev):
403 403 return self.index[rev][0] & 0xFFFF
404 404 def rawsize(self, rev):
405 405 """return the length of the uncompressed text for a given revision"""
406 406 l = self.index[rev][2]
407 407 if l >= 0:
408 408 return l
409 409
410 410 t = self.revision(self.node(rev))
411 411 return len(t)
412 412 size = rawsize
413 413
414 414 def ancestors(self, revs, stoprev=0, inclusive=False):
415 415 """Generate the ancestors of 'revs' in reverse topological order.
416 416 Does not generate revs lower than stoprev.
417 417
418 418 See the documentation for ancestor.lazyancestors for more details."""
419 419
420 420 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
421 421 inclusive=inclusive)
422 422
423 423 def descendants(self, revs):
424 424 """Generate the descendants of 'revs' in revision order.
425 425
426 426 Yield a sequence of revision numbers starting with a child of
427 427 some rev in revs, i.e., each revision is *not* considered a
428 428 descendant of itself. Results are ordered by revision number (a
429 429 topological sort)."""
430 430 first = min(revs)
431 431 if first == nullrev:
432 432 for i in self:
433 433 yield i
434 434 return
435 435
436 436 seen = set(revs)
437 437 for i in self.revs(start=first + 1):
438 438 for x in self.parentrevs(i):
439 439 if x != nullrev and x in seen:
440 440 seen.add(i)
441 441 yield i
442 442 break
443 443
444 444 def findcommonmissing(self, common=None, heads=None):
445 445 """Return a tuple of the ancestors of common and the ancestors of heads
446 446 that are not ancestors of common. In revset terminology, we return the
447 447 tuple:
448 448
449 449 ::common, (::heads) - (::common)
450 450
451 451 The list is sorted by revision number, meaning it is
452 452 topologically sorted.
453 453
454 454 'heads' and 'common' are both lists of node IDs. If heads is
455 455 not supplied, uses all of the revlog's heads. If common is not
456 456 supplied, uses nullid."""
457 457 if common is None:
458 458 common = [nullid]
459 459 if heads is None:
460 460 heads = self.heads()
461 461
462 462 common = [self.rev(n) for n in common]
463 463 heads = [self.rev(n) for n in heads]
464 464
465 465 # we want the ancestors, but inclusive
466 466 class lazyset(object):
467 467 def __init__(self, lazyvalues):
468 468 self.addedvalues = set()
469 469 self.lazyvalues = lazyvalues
470 470
471 471 def __contains__(self, value):
472 472 return value in self.addedvalues or value in self.lazyvalues
473 473
474 474 def __iter__(self):
475 475 added = self.addedvalues
476 476 for r in added:
477 477 yield r
478 478 for r in self.lazyvalues:
479 479 if not r in added:
480 480 yield r
481 481
482 482 def add(self, value):
483 483 self.addedvalues.add(value)
484 484
485 485 def update(self, values):
486 486 self.addedvalues.update(values)
487 487
488 488 has = lazyset(self.ancestors(common))
489 489 has.add(nullrev)
490 490 has.update(common)
491 491
492 492 # take all ancestors from heads that aren't in has
493 493 missing = set()
494 494 visit = collections.deque(r for r in heads if r not in has)
495 495 while visit:
496 496 r = visit.popleft()
497 497 if r in missing:
498 498 continue
499 499 else:
500 500 missing.add(r)
501 501 for p in self.parentrevs(r):
502 502 if p not in has:
503 503 visit.append(p)
504 504 missing = list(missing)
505 505 missing.sort()
506 506 return has, [self.node(r) for r in missing]
507 507
508 508 def incrementalmissingrevs(self, common=None):
509 509 """Return an object that can be used to incrementally compute the
510 510 revision numbers of the ancestors of arbitrary sets that are not
511 511 ancestors of common. This is an ancestor.incrementalmissingancestors
512 512 object.
513 513
514 514 'common' is a list of revision numbers. If common is not supplied, uses
515 515 nullrev.
516 516 """
517 517 if common is None:
518 518 common = [nullrev]
519 519
520 520 return ancestor.incrementalmissingancestors(self.parentrevs, common)
521 521
522 522 def findmissingrevs(self, common=None, heads=None):
523 523 """Return the revision numbers of the ancestors of heads that
524 524 are not ancestors of common.
525 525
526 526 More specifically, return a list of revision numbers corresponding to
527 527 nodes N such that every N satisfies the following constraints:
528 528
529 529 1. N is an ancestor of some node in 'heads'
530 530 2. N is not an ancestor of any node in 'common'
531 531
532 532 The list is sorted by revision number, meaning it is
533 533 topologically sorted.
534 534
535 535 'heads' and 'common' are both lists of revision numbers. If heads is
536 536 not supplied, uses all of the revlog's heads. If common is not
537 537 supplied, uses nullid."""
538 538 if common is None:
539 539 common = [nullrev]
540 540 if heads is None:
541 541 heads = self.headrevs()
542 542
543 543 inc = self.incrementalmissingrevs(common=common)
544 544 return inc.missingancestors(heads)
545 545
546 546 def findmissing(self, common=None, heads=None):
547 547 """Return the ancestors of heads that are not ancestors of common.
548 548
549 549 More specifically, return a list of nodes N such that every N
550 550 satisfies the following constraints:
551 551
552 552 1. N is an ancestor of some node in 'heads'
553 553 2. N is not an ancestor of any node in 'common'
554 554
555 555 The list is sorted by revision number, meaning it is
556 556 topologically sorted.
557 557
558 558 'heads' and 'common' are both lists of node IDs. If heads is
559 559 not supplied, uses all of the revlog's heads. If common is not
560 560 supplied, uses nullid."""
561 561 if common is None:
562 562 common = [nullid]
563 563 if heads is None:
564 564 heads = self.heads()
565 565
566 566 common = [self.rev(n) for n in common]
567 567 heads = [self.rev(n) for n in heads]
568 568
569 569 inc = self.incrementalmissingrevs(common=common)
570 570 return [self.node(r) for r in inc.missingancestors(heads)]
571 571
572 572 def nodesbetween(self, roots=None, heads=None):
573 573 """Return a topological path from 'roots' to 'heads'.
574 574
575 575 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
576 576 topologically sorted list of all nodes N that satisfy both of
577 577 these constraints:
578 578
579 579 1. N is a descendant of some node in 'roots'
580 580 2. N is an ancestor of some node in 'heads'
581 581
582 582 Every node is considered to be both a descendant and an ancestor
583 583 of itself, so every reachable node in 'roots' and 'heads' will be
584 584 included in 'nodes'.
585 585
586 586 'outroots' is the list of reachable nodes in 'roots', i.e., the
587 587 subset of 'roots' that is returned in 'nodes'. Likewise,
588 588 'outheads' is the subset of 'heads' that is also in 'nodes'.
589 589
590 590 'roots' and 'heads' are both lists of node IDs. If 'roots' is
591 591 unspecified, uses nullid as the only root. If 'heads' is
592 592 unspecified, uses list of all of the revlog's heads."""
593 593 nonodes = ([], [], [])
594 594 if roots is not None:
595 595 roots = list(roots)
596 596 if not roots:
597 597 return nonodes
598 598 lowestrev = min([self.rev(n) for n in roots])
599 599 else:
600 600 roots = [nullid] # Everybody's a descendant of nullid
601 601 lowestrev = nullrev
602 602 if (lowestrev == nullrev) and (heads is None):
603 603 # We want _all_ the nodes!
604 604 return ([self.node(r) for r in self], [nullid], list(self.heads()))
605 605 if heads is None:
606 606 # All nodes are ancestors, so the latest ancestor is the last
607 607 # node.
608 608 highestrev = len(self) - 1
609 609 # Set ancestors to None to signal that every node is an ancestor.
610 610 ancestors = None
611 611 # Set heads to an empty dictionary for later discovery of heads
612 612 heads = {}
613 613 else:
614 614 heads = list(heads)
615 615 if not heads:
616 616 return nonodes
617 617 ancestors = set()
618 618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 619 # Also, later we will be using it to filter out the heads we can't
620 620 # find from roots.
621 621 heads = dict.fromkeys(heads, False)
622 622 # Start at the top and keep marking parents until we're done.
623 623 nodestotag = set(heads)
624 624 # Remember where the top was so we can use it as a limit later.
625 625 highestrev = max([self.rev(n) for n in nodestotag])
626 626 while nodestotag:
627 627 # grab a node to tag
628 628 n = nodestotag.pop()
629 629 # Never tag nullid
630 630 if n == nullid:
631 631 continue
632 632 # A node's revision number represents its place in a
633 633 # topologically sorted list of nodes.
634 634 r = self.rev(n)
635 635 if r >= lowestrev:
636 636 if n not in ancestors:
637 637 # If we are possibly a descendant of one of the roots
638 638 # and we haven't already been marked as an ancestor
639 639 ancestors.add(n) # Mark as ancestor
640 640 # Add non-nullid parents to list of nodes to tag.
641 641 nodestotag.update([p for p in self.parents(n) if
642 642 p != nullid])
643 643 elif n in heads: # We've seen it before, is it a fake head?
644 644 # So it is, real heads should not be the ancestors of
645 645 # any other heads.
646 646 heads.pop(n)
647 647 if not ancestors:
648 648 return nonodes
649 649 # Now that we have our set of ancestors, we want to remove any
650 650 # roots that are not ancestors.
651 651
652 652 # If one of the roots was nullid, everything is included anyway.
653 653 if lowestrev > nullrev:
654 654 # But, since we weren't, let's recompute the lowest rev to not
655 655 # include roots that aren't ancestors.
656 656
657 657 # Filter out roots that aren't ancestors of heads
658 658 roots = [n for n in roots if n in ancestors]
659 659 # Recompute the lowest revision
660 660 if roots:
661 661 lowestrev = min([self.rev(n) for n in roots])
662 662 else:
663 663 # No more roots? Return empty list
664 664 return nonodes
665 665 else:
666 666 # We are descending from nullid, and don't need to care about
667 667 # any other roots.
668 668 lowestrev = nullrev
669 669 roots = [nullid]
670 670 # Transform our roots list into a set.
671 671 descendants = set(roots)
672 672 # Also, keep the original roots so we can filter out roots that aren't
673 673 # 'real' roots (i.e. are descended from other roots).
674 674 roots = descendants.copy()
675 675 # Our topologically sorted list of output nodes.
676 676 orderedout = []
677 677 # Don't start at nullid since we don't want nullid in our output list,
678 678 # and if nullid shows up in descendants, empty parents will look like
679 679 # they're descendants.
680 680 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
681 681 n = self.node(r)
682 682 isdescendant = False
683 683 if lowestrev == nullrev: # Everybody is a descendant of nullid
684 684 isdescendant = True
685 685 elif n in descendants:
686 686 # n is already a descendant
687 687 isdescendant = True
688 688 # This check only needs to be done here because all the roots
689 689 # will start being marked is descendants before the loop.
690 690 if n in roots:
691 691 # If n was a root, check if it's a 'real' root.
692 692 p = tuple(self.parents(n))
693 693 # If any of its parents are descendants, it's not a root.
694 694 if (p[0] in descendants) or (p[1] in descendants):
695 695 roots.remove(n)
696 696 else:
697 697 p = tuple(self.parents(n))
698 698 # A node is a descendant if either of its parents are
699 699 # descendants. (We seeded the dependents list with the roots
700 700 # up there, remember?)
701 701 if (p[0] in descendants) or (p[1] in descendants):
702 702 descendants.add(n)
703 703 isdescendant = True
704 704 if isdescendant and ((ancestors is None) or (n in ancestors)):
705 705 # Only include nodes that are both descendants and ancestors.
706 706 orderedout.append(n)
707 707 if (ancestors is not None) and (n in heads):
708 708 # We're trying to figure out which heads are reachable
709 709 # from roots.
710 710 # Mark this head as having been reached
711 711 heads[n] = True
712 712 elif ancestors is None:
713 713 # Otherwise, we're trying to discover the heads.
714 714 # Assume this is a head because if it isn't, the next step
715 715 # will eventually remove it.
716 716 heads[n] = True
717 717 # But, obviously its parents aren't.
718 718 for p in self.parents(n):
719 719 heads.pop(p, None)
720 720 heads = [n for n, flag in heads.iteritems() if flag]
721 721 roots = list(roots)
722 722 assert orderedout
723 723 assert roots
724 724 assert heads
725 725 return (orderedout, roots, heads)
726 726
727 727 def headrevs(self):
728 728 try:
729 729 return self.index.headrevs()
730 730 except AttributeError:
731 731 return self._headrevs()
732 732
733 733 def computephases(self, roots):
734 734 return self.index.computephasesmapsets(roots)
735 735
736 736 def _headrevs(self):
737 737 count = len(self)
738 738 if not count:
739 739 return [nullrev]
740 740 # we won't iter over filtered rev so nobody is a head at start
741 741 ishead = [0] * (count + 1)
742 742 index = self.index
743 743 for r in self:
744 744 ishead[r] = 1 # I may be an head
745 745 e = index[r]
746 746 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
747 747 return [r for r, val in enumerate(ishead) if val]
748 748
749 749 def heads(self, start=None, stop=None):
750 750 """return the list of all nodes that have no children
751 751
752 752 if start is specified, only heads that are descendants of
753 753 start will be returned
754 754 if stop is specified, it will consider all the revs from stop
755 755 as if they had no children
756 756 """
757 757 if start is None and stop is None:
758 758 if not len(self):
759 759 return [nullid]
760 760 return [self.node(r) for r in self.headrevs()]
761 761
762 762 if start is None:
763 763 start = nullid
764 764 if stop is None:
765 765 stop = []
766 766 stoprevs = set([self.rev(n) for n in stop])
767 767 startrev = self.rev(start)
768 768 reachable = set((startrev,))
769 769 heads = set((startrev,))
770 770
771 771 parentrevs = self.parentrevs
772 772 for r in self.revs(start=startrev + 1):
773 773 for p in parentrevs(r):
774 774 if p in reachable:
775 775 if r not in stoprevs:
776 776 reachable.add(r)
777 777 heads.add(r)
778 778 if p in heads and p not in stoprevs:
779 779 heads.remove(p)
780 780
781 781 return [self.node(r) for r in heads]
782 782
783 783 def children(self, node):
784 784 """find the children of a given node"""
785 785 c = []
786 786 p = self.rev(node)
787 787 for r in self.revs(start=p + 1):
788 788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
789 789 if prevs:
790 790 for pr in prevs:
791 791 if pr == p:
792 792 c.append(self.node(r))
793 793 elif p == nullrev:
794 794 c.append(self.node(r))
795 795 return c
796 796
797 797 def descendant(self, start, end):
798 798 if start == nullrev:
799 799 return True
800 800 for i in self.descendants([start]):
801 801 if i == end:
802 802 return True
803 803 elif i > end:
804 804 break
805 805 return False
806 806
807 807 def commonancestorsheads(self, a, b):
808 808 """calculate all the heads of the common ancestors of nodes a and b"""
809 809 a, b = self.rev(a), self.rev(b)
810 810 try:
811 811 ancs = self.index.commonancestorsheads(a, b)
812 812 except (AttributeError, OverflowError): # C implementation failed
813 813 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
814 814 return map(self.node, ancs)
815 815
816 816 def isancestor(self, a, b):
817 817 """return True if node a is an ancestor of node b
818 818
819 819 The implementation of this is trivial but the use of
820 820 commonancestorsheads is not."""
821 821 return a in self.commonancestorsheads(a, b)
822 822
823 823 def ancestor(self, a, b):
824 824 """calculate the "best" common ancestor of nodes a and b"""
825 825
826 826 a, b = self.rev(a), self.rev(b)
827 827 try:
828 828 ancs = self.index.ancestors(a, b)
829 829 except (AttributeError, OverflowError):
830 830 ancs = ancestor.ancestors(self.parentrevs, a, b)
831 831 if ancs:
832 832 # choose a consistent winner when there's a tie
833 833 return min(map(self.node, ancs))
834 834 return nullid
835 835
836 836 def _match(self, id):
837 837 if isinstance(id, int):
838 838 # rev
839 839 return self.node(id)
840 840 if len(id) == 20:
841 841 # possibly a binary node
842 842 # odds of a binary node being all hex in ASCII are 1 in 10**25
843 843 try:
844 844 node = id
845 845 self.rev(node) # quick search the index
846 846 return node
847 847 except LookupError:
848 848 pass # may be partial hex id
849 849 try:
850 850 # str(rev)
851 851 rev = int(id)
852 852 if str(rev) != id:
853 853 raise ValueError
854 854 if rev < 0:
855 855 rev = len(self) + rev
856 856 if rev < 0 or rev >= len(self):
857 857 raise ValueError
858 858 return self.node(rev)
859 859 except (ValueError, OverflowError):
860 860 pass
861 861 if len(id) == 40:
862 862 try:
863 863 # a full hex nodeid?
864 864 node = bin(id)
865 865 self.rev(node)
866 866 return node
867 867 except (TypeError, LookupError):
868 868 pass
869 869
870 870 def _partialmatch(self, id):
871 871 try:
872 872 n = self.index.partialmatch(id)
873 873 if n and self.hasnode(n):
874 874 return n
875 875 return None
876 876 except RevlogError:
877 877 # parsers.c radix tree lookup gave multiple matches
878 878 # fall through to slow path that filters hidden revisions
879 879 pass
880 880 except (AttributeError, ValueError):
881 881 # we are pure python, or key was too short to search radix tree
882 882 pass
883 883
884 884 if id in self._pcache:
885 885 return self._pcache[id]
886 886
887 887 if len(id) < 40:
888 888 try:
889 889 # hex(node)[:...]
890 890 l = len(id) // 2 # grab an even number of digits
891 891 prefix = bin(id[:l * 2])
892 892 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
893 893 nl = [n for n in nl if hex(n).startswith(id) and
894 894 self.hasnode(n)]
895 895 if len(nl) > 0:
896 896 if len(nl) == 1:
897 897 self._pcache[id] = nl[0]
898 898 return nl[0]
899 899 raise LookupError(id, self.indexfile,
900 900 _('ambiguous identifier'))
901 901 return None
902 902 except TypeError:
903 903 pass
904 904
905 905 def lookup(self, id):
906 906 """locate a node based on:
907 907 - revision number or str(revision number)
908 908 - nodeid or subset of hex nodeid
909 909 """
910 910 n = self._match(id)
911 911 if n is not None:
912 912 return n
913 913 n = self._partialmatch(id)
914 914 if n:
915 915 return n
916 916
917 917 raise LookupError(id, self.indexfile, _('no match found'))
918 918
919 919 def cmp(self, node, text):
920 920 """compare text with a given file revision
921 921
922 922 returns True if text is different than what is stored.
923 923 """
924 924 p1, p2 = self.parents(node)
925 925 return hash(text, p1, p2) != node
926 926
927 927 def _addchunk(self, offset, data):
928 928 o, d = self._chunkcache
929 929 # try to add to existing cache
930 930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 931 self._chunkcache = o, d + data
932 932 else:
933 933 self._chunkcache = offset, data
934 934
935 935 def _loadchunk(self, offset, length, df=None):
936 936 """Load a chunk/segment from the revlog.
937 937
938 938 Accepts absolute offset, length to read, and an optional existing
939 939 file handle to read from.
940 940
941 941 If an existing file handle is passed, it will be seeked and the
942 942 original seek position will NOT be restored.
943 943 """
944 944 if df is not None:
945 945 closehandle = False
946 946 else:
947 947 if self._inline:
948 948 df = self.opener(self.indexfile)
949 949 else:
950 950 df = self.opener(self.datafile)
951 951 closehandle = True
952 952
953 953 # Cache data both forward and backward around the requested
954 954 # data, in a fixed size window. This helps speed up operations
955 955 # involving reading the revlog backwards.
956 956 cachesize = self._chunkcachesize
957 957 realoffset = offset & ~(cachesize - 1)
958 958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
959 959 - realoffset)
960 960 df.seek(realoffset)
961 961 d = df.read(reallength)
962 962 if closehandle:
963 963 df.close()
964 964 self._addchunk(realoffset, d)
965 965 if offset != realoffset or reallength != length:
966 966 return util.buffer(d, offset - realoffset, length)
967 967 return d
968 968
969 969 def _getchunk(self, offset, length, df=None):
970 970 o, d = self._chunkcache
971 971 l = len(d)
972 972
973 973 # is it in the cache?
974 974 cachestart = offset - o
975 975 cacheend = cachestart + length
976 976 if cachestart >= 0 and cacheend <= l:
977 977 if cachestart == 0 and cacheend == l:
978 978 return d # avoid a copy
979 979 return util.buffer(d, cachestart, cacheend - cachestart)
980 980
981 981 return self._loadchunk(offset, length, df=df)
982 982
983 983 def _chunkraw(self, startrev, endrev, df=None):
984 984 start = self.start(startrev)
985 985 end = self.end(endrev)
986 986 if self._inline:
987 987 start += (startrev + 1) * self._io.size
988 988 end += (endrev + 1) * self._io.size
989 989 length = end - start
990 990 return self._getchunk(start, length, df=df)
991 991
992 992 def _chunk(self, rev, df=None):
993 993 return decompress(self._chunkraw(rev, rev, df=df))
994 994
995 995 def _chunks(self, revs, df=None):
996 996 '''faster version of [self._chunk(rev) for rev in revs]
997 997
998 998 Assumes that revs is in ascending order.'''
999 999 if not revs:
1000 1000 return []
1001 1001 start = self.start
1002 1002 length = self.length
1003 1003 inline = self._inline
1004 1004 iosize = self._io.size
1005 1005 buffer = util.buffer
1006 1006
1007 1007 l = []
1008 1008 ladd = l.append
1009 1009
1010 1010 # preload the cache
1011 1011 try:
1012 1012 while True:
1013 1013 # ensure that the cache doesn't change out from under us
1014 1014 _cache = self._chunkcache
1015 1015 self._chunkraw(revs[0], revs[-1], df=df)
1016 1016 if _cache == self._chunkcache:
1017 1017 break
1018 1018 offset, data = _cache
1019 1019 except OverflowError:
1020 1020 # issue4215 - we can't cache a run of chunks greater than
1021 1021 # 2G on Windows
1022 1022 return [self._chunk(rev, df=df) for rev in revs]
1023 1023
1024 1024 for rev in revs:
1025 1025 chunkstart = start(rev)
1026 1026 if inline:
1027 1027 chunkstart += (rev + 1) * iosize
1028 1028 chunklength = length(rev)
1029 1029 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1030 1030
1031 1031 return l
1032 1032
1033 1033 def _chunkclear(self):
1034 1034 self._chunkcache = (0, '')
1035 1035
1036 1036 def deltaparent(self, rev):
1037 1037 """return deltaparent of the given revision"""
1038 1038 base = self.index[rev][3]
1039 1039 if base == rev:
1040 1040 return nullrev
1041 1041 elif self._generaldelta:
1042 1042 return base
1043 1043 else:
1044 1044 return rev - 1
1045 1045
1046 1046 def revdiff(self, rev1, rev2):
1047 1047 """return or calculate a delta between two revisions"""
1048 1048 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1049 1049 return str(self._chunk(rev2))
1050 1050
1051 1051 return mdiff.textdiff(self.revision(rev1),
1052 1052 self.revision(rev2))
1053 1053
1054 1054 def revision(self, nodeorrev, _df=None):
1055 1055 """return an uncompressed revision of a given node or revision
1056 1056 number.
1057 1057
1058 1058 _df is an existing file handle to read from. It is meant to only be
1059 1059 used internally.
1060 1060 """
1061 1061 if isinstance(nodeorrev, int):
1062 1062 rev = nodeorrev
1063 1063 node = self.node(rev)
1064 1064 else:
1065 1065 node = nodeorrev
1066 1066 rev = None
1067 1067
1068 1068 cachedrev = None
1069 1069 if node == nullid:
1070 1070 return ""
1071 1071 if self._cache:
1072 1072 if self._cache[0] == node:
1073 1073 return self._cache[2]
1074 1074 cachedrev = self._cache[1]
1075 1075
1076 1076 # look up what we need to read
1077 1077 text = None
1078 1078 if rev is None:
1079 1079 rev = self.rev(node)
1080 1080
1081 1081 # check rev flags
1082 1082 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1083 1083 raise RevlogError(_('incompatible revision flag %x') %
1084 1084 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1085 1085
1086 1086 # build delta chain
1087 1087 chain = []
1088 1088 index = self.index # for performance
1089 1089 generaldelta = self._generaldelta
1090 1090 iterrev = rev
1091 1091 e = index[iterrev]
1092 1092 while iterrev != e[3] and iterrev != cachedrev:
1093 1093 chain.append(iterrev)
1094 1094 if generaldelta:
1095 1095 iterrev = e[3]
1096 1096 else:
1097 1097 iterrev -= 1
1098 1098 e = index[iterrev]
1099 1099
1100 1100 if iterrev == cachedrev:
1101 1101 # cache hit
1102 1102 text = self._cache[2]
1103 1103 else:
1104 1104 chain.append(iterrev)
1105 1105 chain.reverse()
1106 1106
1107 1107 # drop cache to save memory
1108 1108 self._cache = None
1109 1109
1110 1110 bins = self._chunks(chain, df=_df)
1111 1111 if text is None:
1112 1112 text = str(bins[0])
1113 1113 bins = bins[1:]
1114 1114
1115 1115 text = mdiff.patches(text, bins)
1116 1116
1117 1117 text = self._checkhash(text, node, rev)
1118 1118
1119 1119 self._cache = (node, rev, text)
1120 1120 return text
1121 1121
1122 1122 def hash(self, text, p1, p2):
1123 1123 """Compute a node hash.
1124 1124
1125 1125 Available as a function so that subclasses can replace the hash
1126 1126 as needed.
1127 1127 """
1128 1128 return hash(text, p1, p2)
1129 1129
1130 1130 def _checkhash(self, text, node, rev):
1131 1131 p1, p2 = self.parents(node)
1132 1132 self.checkhash(text, p1, p2, node, rev)
1133 1133 return text
1134 1134
1135 1135 def checkhash(self, text, p1, p2, node, rev=None):
1136 1136 if node != self.hash(text, p1, p2):
1137 1137 revornode = rev
1138 1138 if revornode is None:
1139 1139 revornode = templatefilters.short(hex(node))
1140 1140 raise RevlogError(_("integrity check failed on %s:%s")
1141 1141 % (self.indexfile, revornode))
1142 1142
1143 1143 def checkinlinesize(self, tr, fp=None):
1144 1144 """Check if the revlog is too big for inline and convert if so.
1145 1145
1146 1146 This should be called after revisions are added to the revlog. If the
1147 1147 revlog has grown too large to be an inline revlog, it will convert it
1148 1148 to use multiple index and data files.
1149 1149 """
1150 1150 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1151 1151 return
1152 1152
1153 1153 trinfo = tr.find(self.indexfile)
1154 1154 if trinfo is None:
1155 1155 raise RevlogError(_("%s not found in the transaction")
1156 1156 % self.indexfile)
1157 1157
1158 1158 trindex = trinfo[2]
1159 1159 if trindex is not None:
1160 1160 dataoff = self.start(trindex)
1161 1161 else:
1162 1162 # revlog was stripped at start of transaction, use all leftover data
1163 1163 trindex = len(self) - 1
1164 1164 dataoff = self.end(-2)
1165 1165
1166 1166 tr.add(self.datafile, dataoff)
1167 1167
1168 1168 if fp:
1169 1169 fp.flush()
1170 1170 fp.close()
1171 1171
1172 1172 df = self.opener(self.datafile, 'w')
1173 1173 try:
1174 1174 for r in self:
1175 1175 df.write(self._chunkraw(r, r))
1176 1176 finally:
1177 1177 df.close()
1178 1178
1179 1179 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1180 1180 self.version &= ~(REVLOGNGINLINEDATA)
1181 1181 self._inline = False
1182 1182 for i in self:
1183 1183 e = self._io.packentry(self.index[i], self.node, self.version, i)
1184 1184 fp.write(e)
1185 1185
1186 1186 # if we don't call close, the temp file will never replace the
1187 1187 # real index
1188 1188 fp.close()
1189 1189
1190 1190 tr.replace(self.indexfile, trindex * self._io.size)
1191 1191 self._chunkclear()
1192 1192
1193 1193 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1194 1194 node=None):
1195 1195 """add a revision to the log
1196 1196
1197 1197 text - the revision data to add
1198 1198 transaction - the transaction object used for rollback
1199 1199 link - the linkrev data to add
1200 1200 p1, p2 - the parent nodeids of the revision
1201 1201 cachedelta - an optional precomputed delta
1202 1202 node - nodeid of revision; typically node is not specified, and it is
1203 1203 computed by default as hash(text, p1, p2), however subclasses might
1204 1204 use different hashing method (and override checkhash() in such case)
1205 1205 """
1206 1206 if link == nullrev:
1207 1207 raise RevlogError(_("attempted to add linkrev -1 to %s")
1208 1208 % self.indexfile)
1209 1209
1210 1210 if len(text) > _maxentrysize:
1211 1211 raise RevlogError(
1212 1212 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1213 1213 % (self.indexfile, len(text)))
1214 1214
1215 1215 node = node or self.hash(text, p1, p2)
1216 1216 if node in self.nodemap:
1217 1217 return node
1218 1218
1219 1219 dfh = None
1220 1220 if not self._inline:
1221 1221 dfh = self.opener(self.datafile, "a+")
1222 1222 ifh = self.opener(self.indexfile, "a+")
1223 1223 try:
1224 1224 return self._addrevision(node, text, transaction, link, p1, p2,
1225 1225 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1226 1226 finally:
1227 1227 if dfh:
1228 1228 dfh.close()
1229 1229 ifh.close()
1230 1230
1231 1231 def compress(self, text):
1232 1232 """ generate a possibly-compressed representation of text """
1233 1233 if not text:
1234 1234 return ("", text)
1235 1235 l = len(text)
1236 1236 bin = None
1237 1237 if l < 44:
1238 1238 pass
1239 1239 elif l > 1000000:
1240 1240 # zlib makes an internal copy, thus doubling memory usage for
1241 1241 # large files, so lets do this in pieces
1242 1242 z = zlib.compressobj()
1243 1243 p = []
1244 1244 pos = 0
1245 1245 while pos < l:
1246 1246 pos2 = pos + 2**20
1247 1247 p.append(z.compress(text[pos:pos2]))
1248 1248 pos = pos2
1249 1249 p.append(z.flush())
1250 1250 if sum(map(len, p)) < l:
1251 1251 bin = "".join(p)
1252 1252 else:
1253 1253 bin = _compress(text)
1254 1254 if bin is None or len(bin) > l:
1255 1255 if text[0] == '\0':
1256 1256 return ("", text)
1257 1257 return ('u', text)
1258 1258 return ("", bin)
1259 1259
1260 1260 def _isgooddelta(self, d, textlen):
1261 1261 """Returns True if the given delta is good. Good means that it is within
1262 1262 the disk span, disk size, and chain length bounds that we know to be
1263 1263 performant."""
1264 1264 if d is None:
1265 1265 return False
1266 1266
1267 1267 # - 'dist' is the distance from the base revision -- bounding it limits
1268 1268 # the amount of I/O we need to do.
1269 1269 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1270 1270 # to apply -- bounding it limits the amount of CPU we consume.
1271 1271 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1272 1272 if (dist > textlen * 4 or l > textlen or
1273 1273 compresseddeltalen > textlen * 2 or
1274 1274 (self._maxchainlen and chainlen > self._maxchainlen)):
1275 1275 return False
1276 1276
1277 1277 return True
1278 1278
1279 1279 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1280 1280 cachedelta, ifh, dfh, alwayscache=False):
1281 1281 """internal function to add revisions to the log
1282 1282
1283 1283 see addrevision for argument descriptions.
1284 1284 invariants:
1285 1285 - text is optional (can be None); if not set, cachedelta must be set.
1286 1286 if both are set, they must correspond to each other.
1287 1287 """
1288 1288 btext = [text]
1289 1289 def buildtext():
1290 1290 if btext[0] is not None:
1291 1291 return btext[0]
1292 1292 baserev = cachedelta[0]
1293 1293 delta = cachedelta[1]
1294 1294 # special case deltas which replace entire base; no need to decode
1295 1295 # base revision. this neatly avoids censored bases, which throw when
1296 1296 # they're decoded.
1297 1297 hlen = struct.calcsize(">lll")
1298 1298 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1299 1299 len(delta) - hlen):
1300 1300 btext[0] = delta[hlen:]
1301 1301 else:
1302 1302 if self._inline:
1303 1303 fh = ifh
1304 1304 else:
1305 1305 fh = dfh
1306 1306 basetext = self.revision(self.node(baserev), _df=fh)
1307 1307 btext[0] = mdiff.patch(basetext, delta)
1308 1308 try:
1309 1309 self.checkhash(btext[0], p1, p2, node)
1310 1310 if flags & REVIDX_ISCENSORED:
1311 1311 raise RevlogError(_('node %s is not censored') % node)
1312 1312 except CensoredNodeError:
1313 1313 # must pass the censored index flag to add censored revisions
1314 1314 if not flags & REVIDX_ISCENSORED:
1315 1315 raise
1316 1316 return btext[0]
1317 1317
1318 1318 def builddelta(rev):
1319 1319 # can we use the cached delta?
1320 1320 if cachedelta and cachedelta[0] == rev:
1321 1321 delta = cachedelta[1]
1322 1322 else:
1323 1323 t = buildtext()
1324 1324 if self.iscensored(rev):
1325 1325 # deltas based on a censored revision must replace the
1326 1326 # full content in one patch, so delta works everywhere
1327 1327 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1328 1328 delta = header + t
1329 1329 else:
1330 1330 if self._inline:
1331 1331 fh = ifh
1332 1332 else:
1333 1333 fh = dfh
1334 1334 ptext = self.revision(self.node(rev), _df=fh)
1335 1335 delta = mdiff.textdiff(ptext, t)
1336 1336 data = self.compress(delta)
1337 1337 l = len(data[1]) + len(data[0])
1338 1338 if basecache[0] == rev:
1339 1339 chainbase = basecache[1]
1340 1340 else:
1341 1341 chainbase = self.chainbase(rev)
1342 1342 dist = l + offset - self.start(chainbase)
1343 1343 if self._generaldelta:
1344 1344 base = rev
1345 1345 else:
1346 1346 base = chainbase
1347 1347 chainlen, compresseddeltalen = self._chaininfo(rev)
1348 1348 chainlen += 1
1349 1349 compresseddeltalen += l
1350 1350 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1351 1351
1352 1352 curr = len(self)
1353 1353 prev = curr - 1
1354 1354 base = chainbase = curr
1355 1355 chainlen = None
1356 1356 offset = self.end(prev)
1357 1357 d = None
1358 1358 if self._basecache is None:
1359 1359 self._basecache = (prev, self.chainbase(prev))
1360 1360 basecache = self._basecache
1361 1361 p1r, p2r = self.rev(p1), self.rev(p2)
1362 1362
1363 1363 # full versions are inserted when the needed deltas
1364 1364 # become comparable to the uncompressed text
1365 1365 if text is None:
1366 1366 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1367 1367 cachedelta[1])
1368 1368 else:
1369 1369 textlen = len(text)
1370 1370
1371 1371 # should we try to build a delta?
1372 1372 if prev != nullrev:
1373 1373 if self._generaldelta:
1374 1374 if p2r != nullrev and self._aggressivemergedeltas:
1375 1375 d = builddelta(p1r)
1376 1376 d2 = builddelta(p2r)
1377 1377 p1good = self._isgooddelta(d, textlen)
1378 1378 p2good = self._isgooddelta(d2, textlen)
1379 1379 if p1good and p2good:
1380 1380 # If both are good deltas, choose the smallest
1381 1381 if d2[1] < d[1]:
1382 1382 d = d2
1383 1383 elif p2good:
1384 1384 # If only p2 is good, use it
1385 1385 d = d2
1386 1386 elif p1good:
1387 1387 pass
1388 1388 else:
1389 1389 # Neither is good, try against prev to hopefully save us
1390 1390 # a fulltext.
1391 1391 d = builddelta(prev)
1392 1392 else:
1393 1393 # Pick whichever parent is closer to us (to minimize the
1394 1394 # chance of having to build a fulltext). Since
1395 1395 # nullrev == -1, any non-merge commit will always pick p1r.
1396 1396 drev = p2r if p2r > p1r else p1r
1397 1397 d = builddelta(drev)
1398 1398 # If the chosen delta will result in us making a full text,
1399 1399 # give it one last try against prev.
1400 1400 if drev != prev and not self._isgooddelta(d, textlen):
1401 1401 d = builddelta(prev)
1402 1402 else:
1403 1403 d = builddelta(prev)
1404 1404 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1405 1405
1406 1406 if not self._isgooddelta(d, textlen):
1407 1407 text = buildtext()
1408 1408 data = self.compress(text)
1409 1409 l = len(data[1]) + len(data[0])
1410 1410 base = chainbase = curr
1411 1411
1412 1412 e = (offset_type(offset, flags), l, textlen,
1413 1413 base, link, p1r, p2r, node)
1414 1414 self.index.insert(-1, e)
1415 1415 self.nodemap[node] = curr
1416 1416
1417 1417 entry = self._io.packentry(e, self.node, self.version, curr)
1418 1418 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1419 1419
1420 1420 if alwayscache and text is None:
1421 1421 text = buildtext()
1422 1422
1423 1423 if type(text) == str: # only accept immutable objects
1424 1424 self._cache = (node, curr, text)
1425 1425 self._basecache = (curr, chainbase)
1426 1426 return node
1427 1427
1428 1428 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1429 1429 curr = len(self) - 1
1430 1430 if not self._inline:
1431 1431 transaction.add(self.datafile, offset)
1432 1432 transaction.add(self.indexfile, curr * len(entry))
1433 1433 if data[0]:
1434 1434 dfh.write(data[0])
1435 1435 dfh.write(data[1])
1436 dfh.flush()
1437 1436 ifh.write(entry)
1438 1437 else:
1439 1438 offset += curr * self._io.size
1440 1439 transaction.add(self.indexfile, offset, curr)
1441 1440 ifh.write(entry)
1442 1441 ifh.write(data[0])
1443 1442 ifh.write(data[1])
1444 1443 self.checkinlinesize(transaction, ifh)
1445 1444
1446 1445 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1447 1446 """
1448 1447 add a delta group
1449 1448
1450 1449 given a set of deltas, add them to the revision log. the
1451 1450 first delta is against its parent, which should be in our
1452 1451 log, the rest are against the previous delta.
1453 1452
1454 1453 If ``addrevisioncb`` is defined, it will be called with arguments of
1455 1454 this revlog and the node that was added.
1456 1455 """
1457 1456
1458 1457 # track the base of the current delta log
1459 1458 content = []
1460 1459 node = None
1461 1460
1462 1461 r = len(self)
1463 1462 end = 0
1464 1463 if r:
1465 1464 end = self.end(r - 1)
1466 1465 ifh = self.opener(self.indexfile, "a+")
1467 1466 isize = r * self._io.size
1468 1467 if self._inline:
1469 1468 transaction.add(self.indexfile, end + isize, r)
1470 1469 dfh = None
1471 1470 else:
1472 1471 transaction.add(self.indexfile, isize, r)
1473 1472 transaction.add(self.datafile, end)
1474 1473 dfh = self.opener(self.datafile, "a+")
1475 1474 def flush():
1476 1475 if dfh:
1477 1476 dfh.flush()
1478 1477 ifh.flush()
1479 1478 try:
1480 1479 # loop through our set of deltas
1481 1480 chain = None
1482 1481 while True:
1483 1482 chunkdata = bundle.deltachunk(chain)
1484 1483 if not chunkdata:
1485 1484 break
1486 1485 node = chunkdata['node']
1487 1486 p1 = chunkdata['p1']
1488 1487 p2 = chunkdata['p2']
1489 1488 cs = chunkdata['cs']
1490 1489 deltabase = chunkdata['deltabase']
1491 1490 delta = chunkdata['delta']
1492 1491
1493 1492 content.append(node)
1494 1493
1495 1494 link = linkmapper(cs)
1496 1495 if node in self.nodemap:
1497 1496 # this can happen if two branches make the same change
1498 1497 chain = node
1499 1498 continue
1500 1499
1501 1500 for p in (p1, p2):
1502 1501 if p not in self.nodemap:
1503 1502 raise LookupError(p, self.indexfile,
1504 1503 _('unknown parent'))
1505 1504
1506 1505 if deltabase not in self.nodemap:
1507 1506 raise LookupError(deltabase, self.indexfile,
1508 1507 _('unknown delta base'))
1509 1508
1510 1509 baserev = self.rev(deltabase)
1511 1510
1512 1511 if baserev != nullrev and self.iscensored(baserev):
1513 1512 # if base is censored, delta must be full replacement in a
1514 1513 # single patch operation
1515 1514 hlen = struct.calcsize(">lll")
1516 1515 oldlen = self.rawsize(baserev)
1517 1516 newlen = len(delta) - hlen
1518 1517 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1519 1518 raise error.CensoredBaseError(self.indexfile,
1520 1519 self.node(baserev))
1521 1520
1522 1521 flags = REVIDX_DEFAULT_FLAGS
1523 1522 if self._peek_iscensored(baserev, delta, flush):
1524 1523 flags |= REVIDX_ISCENSORED
1525 1524
1526 1525 # We assume consumers of addrevisioncb will want to retrieve
1527 1526 # the added revision, which will require a call to
1528 1527 # revision(). revision() will fast path if there is a cache
1529 1528 # hit. So, we tell _addrevision() to always cache in this case.
1530 1529 chain = self._addrevision(node, None, transaction, link,
1531 1530 p1, p2, flags, (baserev, delta),
1532 1531 ifh, dfh,
1533 1532 alwayscache=bool(addrevisioncb))
1534 1533
1535 1534 if addrevisioncb:
1536 1535 addrevisioncb(self, chain)
1537 1536
1538 1537 if not dfh and not self._inline:
1539 1538 # addrevision switched from inline to conventional
1540 1539 # reopen the index
1541 1540 ifh.close()
1542 1541 dfh = self.opener(self.datafile, "a+")
1543 1542 ifh = self.opener(self.indexfile, "a+")
1544 1543 finally:
1545 1544 if dfh:
1546 1545 dfh.close()
1547 1546 ifh.close()
1548 1547
1549 1548 return content
1550 1549
1551 1550 def iscensored(self, rev):
1552 1551 """Check if a file revision is censored."""
1553 1552 return False
1554 1553
1555 1554 def _peek_iscensored(self, baserev, delta, flush):
1556 1555 """Quickly check if a delta produces a censored revision."""
1557 1556 return False
1558 1557
1559 1558 def getstrippoint(self, minlink):
1560 1559 """find the minimum rev that must be stripped to strip the linkrev
1561 1560
1562 1561 Returns a tuple containing the minimum rev and a set of all revs that
1563 1562 have linkrevs that will be broken by this strip.
1564 1563 """
1565 1564 brokenrevs = set()
1566 1565 strippoint = len(self)
1567 1566
1568 1567 heads = {}
1569 1568 futurelargelinkrevs = set()
1570 1569 for head in self.headrevs():
1571 1570 headlinkrev = self.linkrev(head)
1572 1571 heads[head] = headlinkrev
1573 1572 if headlinkrev >= minlink:
1574 1573 futurelargelinkrevs.add(headlinkrev)
1575 1574
1576 1575 # This algorithm involves walking down the rev graph, starting at the
1577 1576 # heads. Since the revs are topologically sorted according to linkrev,
1578 1577 # once all head linkrevs are below the minlink, we know there are
1579 1578 # no more revs that could have a linkrev greater than minlink.
1580 1579 # So we can stop walking.
1581 1580 while futurelargelinkrevs:
1582 1581 strippoint -= 1
1583 1582 linkrev = heads.pop(strippoint)
1584 1583
1585 1584 if linkrev < minlink:
1586 1585 brokenrevs.add(strippoint)
1587 1586 else:
1588 1587 futurelargelinkrevs.remove(linkrev)
1589 1588
1590 1589 for p in self.parentrevs(strippoint):
1591 1590 if p != nullrev:
1592 1591 plinkrev = self.linkrev(p)
1593 1592 heads[p] = plinkrev
1594 1593 if plinkrev >= minlink:
1595 1594 futurelargelinkrevs.add(plinkrev)
1596 1595
1597 1596 return strippoint, brokenrevs
1598 1597
1599 1598 def strip(self, minlink, transaction):
1600 1599 """truncate the revlog on the first revision with a linkrev >= minlink
1601 1600
1602 1601 This function is called when we're stripping revision minlink and
1603 1602 its descendants from the repository.
1604 1603
1605 1604 We have to remove all revisions with linkrev >= minlink, because
1606 1605 the equivalent changelog revisions will be renumbered after the
1607 1606 strip.
1608 1607
1609 1608 So we truncate the revlog on the first of these revisions, and
1610 1609 trust that the caller has saved the revisions that shouldn't be
1611 1610 removed and that it'll re-add them after this truncation.
1612 1611 """
1613 1612 if len(self) == 0:
1614 1613 return
1615 1614
1616 1615 rev, _ = self.getstrippoint(minlink)
1617 1616 if rev == len(self):
1618 1617 return
1619 1618
1620 1619 # first truncate the files on disk
1621 1620 end = self.start(rev)
1622 1621 if not self._inline:
1623 1622 transaction.add(self.datafile, end)
1624 1623 end = rev * self._io.size
1625 1624 else:
1626 1625 end += rev * self._io.size
1627 1626
1628 1627 transaction.add(self.indexfile, end)
1629 1628
1630 1629 # then reset internal state in memory to forget those revisions
1631 1630 self._cache = None
1632 1631 self._chaininfocache = {}
1633 1632 self._chunkclear()
1634 1633 for x in xrange(rev, len(self)):
1635 1634 del self.nodemap[self.node(x)]
1636 1635
1637 1636 del self.index[rev:-1]
1638 1637
1639 1638 def checksize(self):
1640 1639 expected = 0
1641 1640 if len(self):
1642 1641 expected = max(0, self.end(len(self) - 1))
1643 1642
1644 1643 try:
1645 1644 f = self.opener(self.datafile)
1646 1645 f.seek(0, 2)
1647 1646 actual = f.tell()
1648 1647 f.close()
1649 1648 dd = actual - expected
1650 1649 except IOError as inst:
1651 1650 if inst.errno != errno.ENOENT:
1652 1651 raise
1653 1652 dd = 0
1654 1653
1655 1654 try:
1656 1655 f = self.opener(self.indexfile)
1657 1656 f.seek(0, 2)
1658 1657 actual = f.tell()
1659 1658 f.close()
1660 1659 s = self._io.size
1661 1660 i = max(0, actual // s)
1662 1661 di = actual - (i * s)
1663 1662 if self._inline:
1664 1663 databytes = 0
1665 1664 for r in self:
1666 1665 databytes += max(0, self.length(r))
1667 1666 dd = 0
1668 1667 di = actual - len(self) * s - databytes
1669 1668 except IOError as inst:
1670 1669 if inst.errno != errno.ENOENT:
1671 1670 raise
1672 1671 di = 0
1673 1672
1674 1673 return (dd, di)
1675 1674
1676 1675 def files(self):
1677 1676 res = [self.indexfile]
1678 1677 if not self._inline:
1679 1678 res.append(self.datafile)
1680 1679 return res
General Comments 0
You need to be logged in to leave comments. Login now