##// END OF EJS Templates
revlog: make code in builddelta() slightly easier to read...
Gregory Szorc -
r30011:d81fe5af default
parent child Browse files
Show More
@@ -1,1789 +1,1790 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = hashlib.sha1(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = hashlib.sha1(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215
216 216 If checkambig, indexfile is opened with checkambig=True at
217 217 writing, to avoid file stat ambiguity.
218 218 """
219 219 def __init__(self, opener, indexfile, checkambig=False):
220 220 """
221 221 create a revlog object
222 222
223 223 opener is a function that abstracts the file opening operation
224 224 and can be used to implement COW semantics or the like.
225 225 """
226 226 self.indexfile = indexfile
227 227 self.datafile = indexfile[:-2] + ".d"
228 228 self.opener = opener
229 229 # When True, indexfile is opened with checkambig=True at writing, to
230 230 # avoid file stat ambiguity.
231 231 self._checkambig = checkambig
232 232 # 3-tuple of (node, rev, text) for a raw revision.
233 233 self._cache = None
234 234 # Maps rev to chain base rev.
235 235 self._chainbasecache = util.lrucachedict(100)
236 236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 237 self._chunkcache = (0, '')
238 238 # How much data to read and cache into the raw revlog data cache.
239 239 self._chunkcachesize = 65536
240 240 self._maxchainlen = None
241 241 self._aggressivemergedeltas = False
242 242 self.index = []
243 243 # Mapping of partial identifiers to full nodes.
244 244 self._pcache = {}
245 245 # Mapping of revision integer to full node.
246 246 self._nodecache = {nullid: nullrev}
247 247 self._nodepos = None
248 248
249 249 v = REVLOG_DEFAULT_VERSION
250 250 opts = getattr(opener, 'options', None)
251 251 if opts is not None:
252 252 if 'revlogv1' in opts:
253 253 if 'generaldelta' in opts:
254 254 v |= REVLOGGENERALDELTA
255 255 else:
256 256 v = 0
257 257 if 'chunkcachesize' in opts:
258 258 self._chunkcachesize = opts['chunkcachesize']
259 259 if 'maxchainlen' in opts:
260 260 self._maxchainlen = opts['maxchainlen']
261 261 if 'aggressivemergedeltas' in opts:
262 262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264 264
265 265 if self._chunkcachesize <= 0:
266 266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 267 'than 0') % self._chunkcachesize)
268 268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 270 'of 2') % self._chunkcachesize)
271 271
272 272 indexdata = ''
273 273 self._initempty = True
274 274 try:
275 275 f = self.opener(self.indexfile)
276 276 indexdata = f.read()
277 277 f.close()
278 278 if len(indexdata) > 0:
279 279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 280 self._initempty = False
281 281 except IOError as inst:
282 282 if inst.errno != errno.ENOENT:
283 283 raise
284 284
285 285 self.version = v
286 286 self._inline = v & REVLOGNGINLINEDATA
287 287 self._generaldelta = v & REVLOGGENERALDELTA
288 288 flags = v & ~0xFFFF
289 289 fmt = v & 0xFFFF
290 290 if fmt == REVLOGV0 and flags:
291 291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 292 % (self.indexfile, flags >> 16))
293 293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 295 % (self.indexfile, flags >> 16))
296 296 elif fmt > REVLOGNG:
297 297 raise RevlogError(_("index %s unknown format %d")
298 298 % (self.indexfile, fmt))
299 299
300 300 self._io = revlogio()
301 301 if self.version == REVLOGV0:
302 302 self._io = revlogoldio()
303 303 try:
304 304 d = self._io.parseindex(indexdata, self._inline)
305 305 except (ValueError, IndexError):
306 306 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
307 307 self.index, nodemap, self._chunkcache = d
308 308 if nodemap is not None:
309 309 self.nodemap = self._nodecache = nodemap
310 310 if not self._chunkcache:
311 311 self._chunkclear()
312 312 # revnum -> (chain-length, sum-delta-length)
313 313 self._chaininfocache = {}
314 314
315 315 def tip(self):
316 316 return self.node(len(self.index) - 2)
317 317 def __contains__(self, rev):
318 318 return 0 <= rev < len(self)
319 319 def __len__(self):
320 320 return len(self.index) - 1
321 321 def __iter__(self):
322 322 return iter(xrange(len(self)))
323 323 def revs(self, start=0, stop=None):
324 324 """iterate over all rev in this revlog (from start to stop)"""
325 325 step = 1
326 326 if stop is not None:
327 327 if start > stop:
328 328 step = -1
329 329 stop += step
330 330 else:
331 331 stop = len(self)
332 332 return xrange(start, stop, step)
333 333
334 334 @util.propertycache
335 335 def nodemap(self):
336 336 self.rev(self.node(0))
337 337 return self._nodecache
338 338
339 339 def hasnode(self, node):
340 340 try:
341 341 self.rev(node)
342 342 return True
343 343 except KeyError:
344 344 return False
345 345
346 346 def clearcaches(self):
347 347 self._cache = None
348 348 self._chainbasecache.clear()
349 349 self._chunkcache = (0, '')
350 350 self._pcache = {}
351 351
352 352 try:
353 353 self._nodecache.clearcaches()
354 354 except AttributeError:
355 355 self._nodecache = {nullid: nullrev}
356 356 self._nodepos = None
357 357
358 358 def rev(self, node):
359 359 try:
360 360 return self._nodecache[node]
361 361 except TypeError:
362 362 raise
363 363 except RevlogError:
364 364 # parsers.c radix tree lookup failed
365 365 raise LookupError(node, self.indexfile, _('no node'))
366 366 except KeyError:
367 367 # pure python cache lookup failed
368 368 n = self._nodecache
369 369 i = self.index
370 370 p = self._nodepos
371 371 if p is None:
372 372 p = len(i) - 2
373 373 for r in xrange(p, -1, -1):
374 374 v = i[r][7]
375 375 n[v] = r
376 376 if v == node:
377 377 self._nodepos = r - 1
378 378 return r
379 379 raise LookupError(node, self.indexfile, _('no node'))
380 380
381 381 def node(self, rev):
382 382 return self.index[rev][7]
383 383 def linkrev(self, rev):
384 384 return self.index[rev][4]
385 385 def parents(self, node):
386 386 i = self.index
387 387 d = i[self.rev(node)]
388 388 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
389 389 def parentrevs(self, rev):
390 390 return self.index[rev][5:7]
391 391 def start(self, rev):
392 392 return int(self.index[rev][0] >> 16)
393 393 def end(self, rev):
394 394 return self.start(rev) + self.length(rev)
395 395 def length(self, rev):
396 396 return self.index[rev][1]
397 397 def chainbase(self, rev):
398 398 base = self._chainbasecache.get(rev)
399 399 if base is not None:
400 400 return base
401 401
402 402 index = self.index
403 403 base = index[rev][3]
404 404 while base != rev:
405 405 rev = base
406 406 base = index[rev][3]
407 407
408 408 self._chainbasecache[rev] = base
409 409 return base
410 410 def chainlen(self, rev):
411 411 return self._chaininfo(rev)[0]
412 412
413 413 def _chaininfo(self, rev):
414 414 chaininfocache = self._chaininfocache
415 415 if rev in chaininfocache:
416 416 return chaininfocache[rev]
417 417 index = self.index
418 418 generaldelta = self._generaldelta
419 419 iterrev = rev
420 420 e = index[iterrev]
421 421 clen = 0
422 422 compresseddeltalen = 0
423 423 while iterrev != e[3]:
424 424 clen += 1
425 425 compresseddeltalen += e[1]
426 426 if generaldelta:
427 427 iterrev = e[3]
428 428 else:
429 429 iterrev -= 1
430 430 if iterrev in chaininfocache:
431 431 t = chaininfocache[iterrev]
432 432 clen += t[0]
433 433 compresseddeltalen += t[1]
434 434 break
435 435 e = index[iterrev]
436 436 else:
437 437 # Add text length of base since decompressing that also takes
438 438 # work. For cache hits the length is already included.
439 439 compresseddeltalen += e[1]
440 440 r = (clen, compresseddeltalen)
441 441 chaininfocache[rev] = r
442 442 return r
443 443
444 444 def _deltachain(self, rev, stoprev=None):
445 445 """Obtain the delta chain for a revision.
446 446
447 447 ``stoprev`` specifies a revision to stop at. If not specified, we
448 448 stop at the base of the chain.
449 449
450 450 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
451 451 revs in ascending order and ``stopped`` is a bool indicating whether
452 452 ``stoprev`` was hit.
453 453 """
454 454 chain = []
455 455
456 456 # Alias to prevent attribute lookup in tight loop.
457 457 index = self.index
458 458 generaldelta = self._generaldelta
459 459
460 460 iterrev = rev
461 461 e = index[iterrev]
462 462 while iterrev != e[3] and iterrev != stoprev:
463 463 chain.append(iterrev)
464 464 if generaldelta:
465 465 iterrev = e[3]
466 466 else:
467 467 iterrev -= 1
468 468 e = index[iterrev]
469 469
470 470 if iterrev == stoprev:
471 471 stopped = True
472 472 else:
473 473 chain.append(iterrev)
474 474 stopped = False
475 475
476 476 chain.reverse()
477 477 return chain, stopped
478 478
479 479 def flags(self, rev):
480 480 return self.index[rev][0] & 0xFFFF
481 481 def rawsize(self, rev):
482 482 """return the length of the uncompressed text for a given revision"""
483 483 l = self.index[rev][2]
484 484 if l >= 0:
485 485 return l
486 486
487 487 t = self.revision(self.node(rev))
488 488 return len(t)
489 489 size = rawsize
490 490
491 491 def ancestors(self, revs, stoprev=0, inclusive=False):
492 492 """Generate the ancestors of 'revs' in reverse topological order.
493 493 Does not generate revs lower than stoprev.
494 494
495 495 See the documentation for ancestor.lazyancestors for more details."""
496 496
497 497 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
498 498 inclusive=inclusive)
499 499
500 500 def descendants(self, revs):
501 501 """Generate the descendants of 'revs' in revision order.
502 502
503 503 Yield a sequence of revision numbers starting with a child of
504 504 some rev in revs, i.e., each revision is *not* considered a
505 505 descendant of itself. Results are ordered by revision number (a
506 506 topological sort)."""
507 507 first = min(revs)
508 508 if first == nullrev:
509 509 for i in self:
510 510 yield i
511 511 return
512 512
513 513 seen = set(revs)
514 514 for i in self.revs(start=first + 1):
515 515 for x in self.parentrevs(i):
516 516 if x != nullrev and x in seen:
517 517 seen.add(i)
518 518 yield i
519 519 break
520 520
521 521 def findcommonmissing(self, common=None, heads=None):
522 522 """Return a tuple of the ancestors of common and the ancestors of heads
523 523 that are not ancestors of common. In revset terminology, we return the
524 524 tuple:
525 525
526 526 ::common, (::heads) - (::common)
527 527
528 528 The list is sorted by revision number, meaning it is
529 529 topologically sorted.
530 530
531 531 'heads' and 'common' are both lists of node IDs. If heads is
532 532 not supplied, uses all of the revlog's heads. If common is not
533 533 supplied, uses nullid."""
534 534 if common is None:
535 535 common = [nullid]
536 536 if heads is None:
537 537 heads = self.heads()
538 538
539 539 common = [self.rev(n) for n in common]
540 540 heads = [self.rev(n) for n in heads]
541 541
542 542 # we want the ancestors, but inclusive
543 543 class lazyset(object):
544 544 def __init__(self, lazyvalues):
545 545 self.addedvalues = set()
546 546 self.lazyvalues = lazyvalues
547 547
548 548 def __contains__(self, value):
549 549 return value in self.addedvalues or value in self.lazyvalues
550 550
551 551 def __iter__(self):
552 552 added = self.addedvalues
553 553 for r in added:
554 554 yield r
555 555 for r in self.lazyvalues:
556 556 if not r in added:
557 557 yield r
558 558
559 559 def add(self, value):
560 560 self.addedvalues.add(value)
561 561
562 562 def update(self, values):
563 563 self.addedvalues.update(values)
564 564
565 565 has = lazyset(self.ancestors(common))
566 566 has.add(nullrev)
567 567 has.update(common)
568 568
569 569 # take all ancestors from heads that aren't in has
570 570 missing = set()
571 571 visit = collections.deque(r for r in heads if r not in has)
572 572 while visit:
573 573 r = visit.popleft()
574 574 if r in missing:
575 575 continue
576 576 else:
577 577 missing.add(r)
578 578 for p in self.parentrevs(r):
579 579 if p not in has:
580 580 visit.append(p)
581 581 missing = list(missing)
582 582 missing.sort()
583 583 return has, [self.node(r) for r in missing]
584 584
585 585 def incrementalmissingrevs(self, common=None):
586 586 """Return an object that can be used to incrementally compute the
587 587 revision numbers of the ancestors of arbitrary sets that are not
588 588 ancestors of common. This is an ancestor.incrementalmissingancestors
589 589 object.
590 590
591 591 'common' is a list of revision numbers. If common is not supplied, uses
592 592 nullrev.
593 593 """
594 594 if common is None:
595 595 common = [nullrev]
596 596
597 597 return ancestor.incrementalmissingancestors(self.parentrevs, common)
598 598
599 599 def findmissingrevs(self, common=None, heads=None):
600 600 """Return the revision numbers of the ancestors of heads that
601 601 are not ancestors of common.
602 602
603 603 More specifically, return a list of revision numbers corresponding to
604 604 nodes N such that every N satisfies the following constraints:
605 605
606 606 1. N is an ancestor of some node in 'heads'
607 607 2. N is not an ancestor of any node in 'common'
608 608
609 609 The list is sorted by revision number, meaning it is
610 610 topologically sorted.
611 611
612 612 'heads' and 'common' are both lists of revision numbers. If heads is
613 613 not supplied, uses all of the revlog's heads. If common is not
614 614 supplied, uses nullid."""
615 615 if common is None:
616 616 common = [nullrev]
617 617 if heads is None:
618 618 heads = self.headrevs()
619 619
620 620 inc = self.incrementalmissingrevs(common=common)
621 621 return inc.missingancestors(heads)
622 622
623 623 def findmissing(self, common=None, heads=None):
624 624 """Return the ancestors of heads that are not ancestors of common.
625 625
626 626 More specifically, return a list of nodes N such that every N
627 627 satisfies the following constraints:
628 628
629 629 1. N is an ancestor of some node in 'heads'
630 630 2. N is not an ancestor of any node in 'common'
631 631
632 632 The list is sorted by revision number, meaning it is
633 633 topologically sorted.
634 634
635 635 'heads' and 'common' are both lists of node IDs. If heads is
636 636 not supplied, uses all of the revlog's heads. If common is not
637 637 supplied, uses nullid."""
638 638 if common is None:
639 639 common = [nullid]
640 640 if heads is None:
641 641 heads = self.heads()
642 642
643 643 common = [self.rev(n) for n in common]
644 644 heads = [self.rev(n) for n in heads]
645 645
646 646 inc = self.incrementalmissingrevs(common=common)
647 647 return [self.node(r) for r in inc.missingancestors(heads)]
648 648
649 649 def nodesbetween(self, roots=None, heads=None):
650 650 """Return a topological path from 'roots' to 'heads'.
651 651
652 652 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
653 653 topologically sorted list of all nodes N that satisfy both of
654 654 these constraints:
655 655
656 656 1. N is a descendant of some node in 'roots'
657 657 2. N is an ancestor of some node in 'heads'
658 658
659 659 Every node is considered to be both a descendant and an ancestor
660 660 of itself, so every reachable node in 'roots' and 'heads' will be
661 661 included in 'nodes'.
662 662
663 663 'outroots' is the list of reachable nodes in 'roots', i.e., the
664 664 subset of 'roots' that is returned in 'nodes'. Likewise,
665 665 'outheads' is the subset of 'heads' that is also in 'nodes'.
666 666
667 667 'roots' and 'heads' are both lists of node IDs. If 'roots' is
668 668 unspecified, uses nullid as the only root. If 'heads' is
669 669 unspecified, uses list of all of the revlog's heads."""
670 670 nonodes = ([], [], [])
671 671 if roots is not None:
672 672 roots = list(roots)
673 673 if not roots:
674 674 return nonodes
675 675 lowestrev = min([self.rev(n) for n in roots])
676 676 else:
677 677 roots = [nullid] # Everybody's a descendant of nullid
678 678 lowestrev = nullrev
679 679 if (lowestrev == nullrev) and (heads is None):
680 680 # We want _all_ the nodes!
681 681 return ([self.node(r) for r in self], [nullid], list(self.heads()))
682 682 if heads is None:
683 683 # All nodes are ancestors, so the latest ancestor is the last
684 684 # node.
685 685 highestrev = len(self) - 1
686 686 # Set ancestors to None to signal that every node is an ancestor.
687 687 ancestors = None
688 688 # Set heads to an empty dictionary for later discovery of heads
689 689 heads = {}
690 690 else:
691 691 heads = list(heads)
692 692 if not heads:
693 693 return nonodes
694 694 ancestors = set()
695 695 # Turn heads into a dictionary so we can remove 'fake' heads.
696 696 # Also, later we will be using it to filter out the heads we can't
697 697 # find from roots.
698 698 heads = dict.fromkeys(heads, False)
699 699 # Start at the top and keep marking parents until we're done.
700 700 nodestotag = set(heads)
701 701 # Remember where the top was so we can use it as a limit later.
702 702 highestrev = max([self.rev(n) for n in nodestotag])
703 703 while nodestotag:
704 704 # grab a node to tag
705 705 n = nodestotag.pop()
706 706 # Never tag nullid
707 707 if n == nullid:
708 708 continue
709 709 # A node's revision number represents its place in a
710 710 # topologically sorted list of nodes.
711 711 r = self.rev(n)
712 712 if r >= lowestrev:
713 713 if n not in ancestors:
714 714 # If we are possibly a descendant of one of the roots
715 715 # and we haven't already been marked as an ancestor
716 716 ancestors.add(n) # Mark as ancestor
717 717 # Add non-nullid parents to list of nodes to tag.
718 718 nodestotag.update([p for p in self.parents(n) if
719 719 p != nullid])
720 720 elif n in heads: # We've seen it before, is it a fake head?
721 721 # So it is, real heads should not be the ancestors of
722 722 # any other heads.
723 723 heads.pop(n)
724 724 if not ancestors:
725 725 return nonodes
726 726 # Now that we have our set of ancestors, we want to remove any
727 727 # roots that are not ancestors.
728 728
729 729 # If one of the roots was nullid, everything is included anyway.
730 730 if lowestrev > nullrev:
731 731 # But, since we weren't, let's recompute the lowest rev to not
732 732 # include roots that aren't ancestors.
733 733
734 734 # Filter out roots that aren't ancestors of heads
735 735 roots = [n for n in roots if n in ancestors]
736 736 # Recompute the lowest revision
737 737 if roots:
738 738 lowestrev = min([self.rev(n) for n in roots])
739 739 else:
740 740 # No more roots? Return empty list
741 741 return nonodes
742 742 else:
743 743 # We are descending from nullid, and don't need to care about
744 744 # any other roots.
745 745 lowestrev = nullrev
746 746 roots = [nullid]
747 747 # Transform our roots list into a set.
748 748 descendants = set(roots)
749 749 # Also, keep the original roots so we can filter out roots that aren't
750 750 # 'real' roots (i.e. are descended from other roots).
751 751 roots = descendants.copy()
752 752 # Our topologically sorted list of output nodes.
753 753 orderedout = []
754 754 # Don't start at nullid since we don't want nullid in our output list,
755 755 # and if nullid shows up in descendants, empty parents will look like
756 756 # they're descendants.
757 757 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
758 758 n = self.node(r)
759 759 isdescendant = False
760 760 if lowestrev == nullrev: # Everybody is a descendant of nullid
761 761 isdescendant = True
762 762 elif n in descendants:
763 763 # n is already a descendant
764 764 isdescendant = True
765 765 # This check only needs to be done here because all the roots
766 766 # will start being marked is descendants before the loop.
767 767 if n in roots:
768 768 # If n was a root, check if it's a 'real' root.
769 769 p = tuple(self.parents(n))
770 770 # If any of its parents are descendants, it's not a root.
771 771 if (p[0] in descendants) or (p[1] in descendants):
772 772 roots.remove(n)
773 773 else:
774 774 p = tuple(self.parents(n))
775 775 # A node is a descendant if either of its parents are
776 776 # descendants. (We seeded the dependents list with the roots
777 777 # up there, remember?)
778 778 if (p[0] in descendants) or (p[1] in descendants):
779 779 descendants.add(n)
780 780 isdescendant = True
781 781 if isdescendant and ((ancestors is None) or (n in ancestors)):
782 782 # Only include nodes that are both descendants and ancestors.
783 783 orderedout.append(n)
784 784 if (ancestors is not None) and (n in heads):
785 785 # We're trying to figure out which heads are reachable
786 786 # from roots.
787 787 # Mark this head as having been reached
788 788 heads[n] = True
789 789 elif ancestors is None:
790 790 # Otherwise, we're trying to discover the heads.
791 791 # Assume this is a head because if it isn't, the next step
792 792 # will eventually remove it.
793 793 heads[n] = True
794 794 # But, obviously its parents aren't.
795 795 for p in self.parents(n):
796 796 heads.pop(p, None)
797 797 heads = [n for n, flag in heads.iteritems() if flag]
798 798 roots = list(roots)
799 799 assert orderedout
800 800 assert roots
801 801 assert heads
802 802 return (orderedout, roots, heads)
803 803
804 804 def headrevs(self):
805 805 try:
806 806 return self.index.headrevs()
807 807 except AttributeError:
808 808 return self._headrevs()
809 809
810 810 def computephases(self, roots):
811 811 return self.index.computephasesmapsets(roots)
812 812
813 813 def _headrevs(self):
814 814 count = len(self)
815 815 if not count:
816 816 return [nullrev]
817 817 # we won't iter over filtered rev so nobody is a head at start
818 818 ishead = [0] * (count + 1)
819 819 index = self.index
820 820 for r in self:
821 821 ishead[r] = 1 # I may be an head
822 822 e = index[r]
823 823 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
824 824 return [r for r, val in enumerate(ishead) if val]
825 825
826 826 def heads(self, start=None, stop=None):
827 827 """return the list of all nodes that have no children
828 828
829 829 if start is specified, only heads that are descendants of
830 830 start will be returned
831 831 if stop is specified, it will consider all the revs from stop
832 832 as if they had no children
833 833 """
834 834 if start is None and stop is None:
835 835 if not len(self):
836 836 return [nullid]
837 837 return [self.node(r) for r in self.headrevs()]
838 838
839 839 if start is None:
840 840 start = nullid
841 841 if stop is None:
842 842 stop = []
843 843 stoprevs = set([self.rev(n) for n in stop])
844 844 startrev = self.rev(start)
845 845 reachable = set((startrev,))
846 846 heads = set((startrev,))
847 847
848 848 parentrevs = self.parentrevs
849 849 for r in self.revs(start=startrev + 1):
850 850 for p in parentrevs(r):
851 851 if p in reachable:
852 852 if r not in stoprevs:
853 853 reachable.add(r)
854 854 heads.add(r)
855 855 if p in heads and p not in stoprevs:
856 856 heads.remove(p)
857 857
858 858 return [self.node(r) for r in heads]
859 859
860 860 def children(self, node):
861 861 """find the children of a given node"""
862 862 c = []
863 863 p = self.rev(node)
864 864 for r in self.revs(start=p + 1):
865 865 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
866 866 if prevs:
867 867 for pr in prevs:
868 868 if pr == p:
869 869 c.append(self.node(r))
870 870 elif p == nullrev:
871 871 c.append(self.node(r))
872 872 return c
873 873
874 874 def descendant(self, start, end):
875 875 if start == nullrev:
876 876 return True
877 877 for i in self.descendants([start]):
878 878 if i == end:
879 879 return True
880 880 elif i > end:
881 881 break
882 882 return False
883 883
884 884 def commonancestorsheads(self, a, b):
885 885 """calculate all the heads of the common ancestors of nodes a and b"""
886 886 a, b = self.rev(a), self.rev(b)
887 887 try:
888 888 ancs = self.index.commonancestorsheads(a, b)
889 889 except (AttributeError, OverflowError): # C implementation failed
890 890 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
891 891 return map(self.node, ancs)
892 892
893 893 def isancestor(self, a, b):
894 894 """return True if node a is an ancestor of node b
895 895
896 896 The implementation of this is trivial but the use of
897 897 commonancestorsheads is not."""
898 898 return a in self.commonancestorsheads(a, b)
899 899
900 900 def ancestor(self, a, b):
901 901 """calculate the "best" common ancestor of nodes a and b"""
902 902
903 903 a, b = self.rev(a), self.rev(b)
904 904 try:
905 905 ancs = self.index.ancestors(a, b)
906 906 except (AttributeError, OverflowError):
907 907 ancs = ancestor.ancestors(self.parentrevs, a, b)
908 908 if ancs:
909 909 # choose a consistent winner when there's a tie
910 910 return min(map(self.node, ancs))
911 911 return nullid
912 912
913 913 def _match(self, id):
914 914 if isinstance(id, int):
915 915 # rev
916 916 return self.node(id)
917 917 if len(id) == 20:
918 918 # possibly a binary node
919 919 # odds of a binary node being all hex in ASCII are 1 in 10**25
920 920 try:
921 921 node = id
922 922 self.rev(node) # quick search the index
923 923 return node
924 924 except LookupError:
925 925 pass # may be partial hex id
926 926 try:
927 927 # str(rev)
928 928 rev = int(id)
929 929 if str(rev) != id:
930 930 raise ValueError
931 931 if rev < 0:
932 932 rev = len(self) + rev
933 933 if rev < 0 or rev >= len(self):
934 934 raise ValueError
935 935 return self.node(rev)
936 936 except (ValueError, OverflowError):
937 937 pass
938 938 if len(id) == 40:
939 939 try:
940 940 # a full hex nodeid?
941 941 node = bin(id)
942 942 self.rev(node)
943 943 return node
944 944 except (TypeError, LookupError):
945 945 pass
946 946
947 947 def _partialmatch(self, id):
948 948 try:
949 949 n = self.index.partialmatch(id)
950 950 if n and self.hasnode(n):
951 951 return n
952 952 return None
953 953 except RevlogError:
954 954 # parsers.c radix tree lookup gave multiple matches
955 955 # fast path: for unfiltered changelog, radix tree is accurate
956 956 if not getattr(self, 'filteredrevs', None):
957 957 raise LookupError(id, self.indexfile,
958 958 _('ambiguous identifier'))
959 959 # fall through to slow path that filters hidden revisions
960 960 except (AttributeError, ValueError):
961 961 # we are pure python, or key was too short to search radix tree
962 962 pass
963 963
964 964 if id in self._pcache:
965 965 return self._pcache[id]
966 966
967 967 if len(id) < 40:
968 968 try:
969 969 # hex(node)[:...]
970 970 l = len(id) // 2 # grab an even number of digits
971 971 prefix = bin(id[:l * 2])
972 972 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
973 973 nl = [n for n in nl if hex(n).startswith(id) and
974 974 self.hasnode(n)]
975 975 if len(nl) > 0:
976 976 if len(nl) == 1:
977 977 self._pcache[id] = nl[0]
978 978 return nl[0]
979 979 raise LookupError(id, self.indexfile,
980 980 _('ambiguous identifier'))
981 981 return None
982 982 except TypeError:
983 983 pass
984 984
985 985 def lookup(self, id):
986 986 """locate a node based on:
987 987 - revision number or str(revision number)
988 988 - nodeid or subset of hex nodeid
989 989 """
990 990 n = self._match(id)
991 991 if n is not None:
992 992 return n
993 993 n = self._partialmatch(id)
994 994 if n:
995 995 return n
996 996
997 997 raise LookupError(id, self.indexfile, _('no match found'))
998 998
999 999 def cmp(self, node, text):
1000 1000 """compare text with a given file revision
1001 1001
1002 1002 returns True if text is different than what is stored.
1003 1003 """
1004 1004 p1, p2 = self.parents(node)
1005 1005 return hash(text, p1, p2) != node
1006 1006
1007 1007 def _addchunk(self, offset, data):
1008 1008 """Add a segment to the revlog cache.
1009 1009
1010 1010 Accepts an absolute offset and the data that is at that location.
1011 1011 """
1012 1012 o, d = self._chunkcache
1013 1013 # try to add to existing cache
1014 1014 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1015 1015 self._chunkcache = o, d + data
1016 1016 else:
1017 1017 self._chunkcache = offset, data
1018 1018
1019 1019 def _loadchunk(self, offset, length, df=None):
1020 1020 """Load a segment of raw data from the revlog.
1021 1021
1022 1022 Accepts an absolute offset, length to read, and an optional existing
1023 1023 file handle to read from.
1024 1024
1025 1025 If an existing file handle is passed, it will be seeked and the
1026 1026 original seek position will NOT be restored.
1027 1027
1028 1028 Returns a str or buffer of raw byte data.
1029 1029 """
1030 1030 if df is not None:
1031 1031 closehandle = False
1032 1032 else:
1033 1033 if self._inline:
1034 1034 df = self.opener(self.indexfile)
1035 1035 else:
1036 1036 df = self.opener(self.datafile)
1037 1037 closehandle = True
1038 1038
1039 1039 # Cache data both forward and backward around the requested
1040 1040 # data, in a fixed size window. This helps speed up operations
1041 1041 # involving reading the revlog backwards.
1042 1042 cachesize = self._chunkcachesize
1043 1043 realoffset = offset & ~(cachesize - 1)
1044 1044 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1045 1045 - realoffset)
1046 1046 df.seek(realoffset)
1047 1047 d = df.read(reallength)
1048 1048 if closehandle:
1049 1049 df.close()
1050 1050 self._addchunk(realoffset, d)
1051 1051 if offset != realoffset or reallength != length:
1052 1052 return util.buffer(d, offset - realoffset, length)
1053 1053 return d
1054 1054
1055 1055 def _getchunk(self, offset, length, df=None):
1056 1056 """Obtain a segment of raw data from the revlog.
1057 1057
1058 1058 Accepts an absolute offset, length of bytes to obtain, and an
1059 1059 optional file handle to the already-opened revlog. If the file
1060 1060 handle is used, it's original seek position will not be preserved.
1061 1061
1062 1062 Requests for data may be returned from a cache.
1063 1063
1064 1064 Returns a str or a buffer instance of raw byte data.
1065 1065 """
1066 1066 o, d = self._chunkcache
1067 1067 l = len(d)
1068 1068
1069 1069 # is it in the cache?
1070 1070 cachestart = offset - o
1071 1071 cacheend = cachestart + length
1072 1072 if cachestart >= 0 and cacheend <= l:
1073 1073 if cachestart == 0 and cacheend == l:
1074 1074 return d # avoid a copy
1075 1075 return util.buffer(d, cachestart, cacheend - cachestart)
1076 1076
1077 1077 return self._loadchunk(offset, length, df=df)
1078 1078
1079 1079 def _chunkraw(self, startrev, endrev, df=None):
1080 1080 """Obtain a segment of raw data corresponding to a range of revisions.
1081 1081
1082 1082 Accepts the start and end revisions and an optional already-open
1083 1083 file handle to be used for reading. If the file handle is read, its
1084 1084 seek position will not be preserved.
1085 1085
1086 1086 Requests for data may be satisfied by a cache.
1087 1087
1088 1088 Returns a 2-tuple of (offset, data) for the requested range of
1089 1089 revisions. Offset is the integer offset from the beginning of the
1090 1090 revlog and data is a str or buffer of the raw byte data.
1091 1091
1092 1092 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1093 1093 to determine where each revision's data begins and ends.
1094 1094 """
1095 1095 start = self.start(startrev)
1096 1096 end = self.end(endrev)
1097 1097 if self._inline:
1098 1098 start += (startrev + 1) * self._io.size
1099 1099 end += (endrev + 1) * self._io.size
1100 1100 length = end - start
1101 1101
1102 1102 return start, self._getchunk(start, length, df=df)
1103 1103
1104 1104 def _chunk(self, rev, df=None):
1105 1105 """Obtain a single decompressed chunk for a revision.
1106 1106
1107 1107 Accepts an integer revision and an optional already-open file handle
1108 1108 to be used for reading. If used, the seek position of the file will not
1109 1109 be preserved.
1110 1110
1111 1111 Returns a str holding uncompressed data for the requested revision.
1112 1112 """
1113 1113 return decompress(self._chunkraw(rev, rev, df=df)[1])
1114 1114
1115 1115 def _chunks(self, revs, df=None):
1116 1116 """Obtain decompressed chunks for the specified revisions.
1117 1117
1118 1118 Accepts an iterable of numeric revisions that are assumed to be in
1119 1119 ascending order. Also accepts an optional already-open file handle
1120 1120 to be used for reading. If used, the seek position of the file will
1121 1121 not be preserved.
1122 1122
1123 1123 This function is similar to calling ``self._chunk()`` multiple times,
1124 1124 but is faster.
1125 1125
1126 1126 Returns a list with decompressed data for each requested revision.
1127 1127 """
1128 1128 if not revs:
1129 1129 return []
1130 1130 start = self.start
1131 1131 length = self.length
1132 1132 inline = self._inline
1133 1133 iosize = self._io.size
1134 1134 buffer = util.buffer
1135 1135
1136 1136 l = []
1137 1137 ladd = l.append
1138 1138
1139 1139 try:
1140 1140 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1141 1141 except OverflowError:
1142 1142 # issue4215 - we can't cache a run of chunks greater than
1143 1143 # 2G on Windows
1144 1144 return [self._chunk(rev, df=df) for rev in revs]
1145 1145
1146 1146 for rev in revs:
1147 1147 chunkstart = start(rev)
1148 1148 if inline:
1149 1149 chunkstart += (rev + 1) * iosize
1150 1150 chunklength = length(rev)
1151 1151 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1152 1152
1153 1153 return l
1154 1154
1155 1155 def _chunkclear(self):
1156 1156 """Clear the raw chunk cache."""
1157 1157 self._chunkcache = (0, '')
1158 1158
1159 1159 def deltaparent(self, rev):
1160 1160 """return deltaparent of the given revision"""
1161 1161 base = self.index[rev][3]
1162 1162 if base == rev:
1163 1163 return nullrev
1164 1164 elif self._generaldelta:
1165 1165 return base
1166 1166 else:
1167 1167 return rev - 1
1168 1168
1169 1169 def revdiff(self, rev1, rev2):
1170 1170 """return or calculate a delta between two revisions"""
1171 1171 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1172 1172 return str(self._chunk(rev2))
1173 1173
1174 1174 return mdiff.textdiff(self.revision(rev1),
1175 1175 self.revision(rev2))
1176 1176
1177 1177 def revision(self, nodeorrev, _df=None):
1178 1178 """return an uncompressed revision of a given node or revision
1179 1179 number.
1180 1180
1181 1181 _df is an existing file handle to read from. It is meant to only be
1182 1182 used internally.
1183 1183 """
1184 1184 if isinstance(nodeorrev, int):
1185 1185 rev = nodeorrev
1186 1186 node = self.node(rev)
1187 1187 else:
1188 1188 node = nodeorrev
1189 1189 rev = None
1190 1190
1191 1191 cachedrev = None
1192 1192 if node == nullid:
1193 1193 return ""
1194 1194 if self._cache:
1195 1195 if self._cache[0] == node:
1196 1196 return self._cache[2]
1197 1197 cachedrev = self._cache[1]
1198 1198
1199 1199 # look up what we need to read
1200 1200 text = None
1201 1201 if rev is None:
1202 1202 rev = self.rev(node)
1203 1203
1204 1204 # check rev flags
1205 1205 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1206 1206 raise RevlogError(_('incompatible revision flag %x') %
1207 1207 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1208 1208
1209 1209 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1210 1210 if stopped:
1211 1211 text = self._cache[2]
1212 1212
1213 1213 # drop cache to save memory
1214 1214 self._cache = None
1215 1215
1216 1216 bins = self._chunks(chain, df=_df)
1217 1217 if text is None:
1218 1218 text = str(bins[0])
1219 1219 bins = bins[1:]
1220 1220
1221 1221 text = mdiff.patches(text, bins)
1222 1222
1223 1223 text = self._checkhash(text, node, rev)
1224 1224
1225 1225 self._cache = (node, rev, text)
1226 1226 return text
1227 1227
1228 1228 def hash(self, text, p1, p2):
1229 1229 """Compute a node hash.
1230 1230
1231 1231 Available as a function so that subclasses can replace the hash
1232 1232 as needed.
1233 1233 """
1234 1234 return hash(text, p1, p2)
1235 1235
1236 1236 def _checkhash(self, text, node, rev):
1237 1237 p1, p2 = self.parents(node)
1238 1238 self.checkhash(text, p1, p2, node, rev)
1239 1239 return text
1240 1240
1241 1241 def checkhash(self, text, p1, p2, node, rev=None):
1242 1242 if node != self.hash(text, p1, p2):
1243 1243 revornode = rev
1244 1244 if revornode is None:
1245 1245 revornode = templatefilters.short(hex(node))
1246 1246 raise RevlogError(_("integrity check failed on %s:%s")
1247 1247 % (self.indexfile, revornode))
1248 1248
1249 1249 def checkinlinesize(self, tr, fp=None):
1250 1250 """Check if the revlog is too big for inline and convert if so.
1251 1251
1252 1252 This should be called after revisions are added to the revlog. If the
1253 1253 revlog has grown too large to be an inline revlog, it will convert it
1254 1254 to use multiple index and data files.
1255 1255 """
1256 1256 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1257 1257 return
1258 1258
1259 1259 trinfo = tr.find(self.indexfile)
1260 1260 if trinfo is None:
1261 1261 raise RevlogError(_("%s not found in the transaction")
1262 1262 % self.indexfile)
1263 1263
1264 1264 trindex = trinfo[2]
1265 1265 if trindex is not None:
1266 1266 dataoff = self.start(trindex)
1267 1267 else:
1268 1268 # revlog was stripped at start of transaction, use all leftover data
1269 1269 trindex = len(self) - 1
1270 1270 dataoff = self.end(-2)
1271 1271
1272 1272 tr.add(self.datafile, dataoff)
1273 1273
1274 1274 if fp:
1275 1275 fp.flush()
1276 1276 fp.close()
1277 1277
1278 1278 df = self.opener(self.datafile, 'w')
1279 1279 try:
1280 1280 for r in self:
1281 1281 df.write(self._chunkraw(r, r)[1])
1282 1282 finally:
1283 1283 df.close()
1284 1284
1285 1285 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1286 1286 checkambig=self._checkambig)
1287 1287 self.version &= ~(REVLOGNGINLINEDATA)
1288 1288 self._inline = False
1289 1289 for i in self:
1290 1290 e = self._io.packentry(self.index[i], self.node, self.version, i)
1291 1291 fp.write(e)
1292 1292
1293 1293 # if we don't call close, the temp file will never replace the
1294 1294 # real index
1295 1295 fp.close()
1296 1296
1297 1297 tr.replace(self.indexfile, trindex * self._io.size)
1298 1298 self._chunkclear()
1299 1299
1300 1300 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1301 1301 node=None):
1302 1302 """add a revision to the log
1303 1303
1304 1304 text - the revision data to add
1305 1305 transaction - the transaction object used for rollback
1306 1306 link - the linkrev data to add
1307 1307 p1, p2 - the parent nodeids of the revision
1308 1308 cachedelta - an optional precomputed delta
1309 1309 node - nodeid of revision; typically node is not specified, and it is
1310 1310 computed by default as hash(text, p1, p2), however subclasses might
1311 1311 use different hashing method (and override checkhash() in such case)
1312 1312 """
1313 1313 if link == nullrev:
1314 1314 raise RevlogError(_("attempted to add linkrev -1 to %s")
1315 1315 % self.indexfile)
1316 1316
1317 1317 if len(text) > _maxentrysize:
1318 1318 raise RevlogError(
1319 1319 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1320 1320 % (self.indexfile, len(text)))
1321 1321
1322 1322 node = node or self.hash(text, p1, p2)
1323 1323 if node in self.nodemap:
1324 1324 return node
1325 1325
1326 1326 dfh = None
1327 1327 if not self._inline:
1328 1328 dfh = self.opener(self.datafile, "a+")
1329 1329 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1330 1330 try:
1331 1331 return self._addrevision(node, text, transaction, link, p1, p2,
1332 1332 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1333 1333 finally:
1334 1334 if dfh:
1335 1335 dfh.close()
1336 1336 ifh.close()
1337 1337
1338 1338 def compress(self, text):
1339 1339 """ generate a possibly-compressed representation of text """
1340 1340 if not text:
1341 1341 return ("", text)
1342 1342 l = len(text)
1343 1343 bin = None
1344 1344 if l < 44:
1345 1345 pass
1346 1346 elif l > 1000000:
1347 1347 # zlib makes an internal copy, thus doubling memory usage for
1348 1348 # large files, so lets do this in pieces
1349 1349 z = zlib.compressobj()
1350 1350 p = []
1351 1351 pos = 0
1352 1352 while pos < l:
1353 1353 pos2 = pos + 2**20
1354 1354 p.append(z.compress(text[pos:pos2]))
1355 1355 pos = pos2
1356 1356 p.append(z.flush())
1357 1357 if sum(map(len, p)) < l:
1358 1358 bin = "".join(p)
1359 1359 else:
1360 1360 bin = _compress(text)
1361 1361 if bin is None or len(bin) > l:
1362 1362 if text[0] == '\0':
1363 1363 return ("", text)
1364 1364 return ('u', text)
1365 1365 return ("", bin)
1366 1366
1367 1367 def _isgooddelta(self, d, textlen):
1368 1368 """Returns True if the given delta is good. Good means that it is within
1369 1369 the disk span, disk size, and chain length bounds that we know to be
1370 1370 performant."""
1371 1371 if d is None:
1372 1372 return False
1373 1373
1374 1374 # - 'dist' is the distance from the base revision -- bounding it limits
1375 1375 # the amount of I/O we need to do.
1376 1376 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1377 1377 # to apply -- bounding it limits the amount of CPU we consume.
1378 1378 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1379 1379 if (dist > textlen * 4 or l > textlen or
1380 1380 compresseddeltalen > textlen * 2 or
1381 1381 (self._maxchainlen and chainlen > self._maxchainlen)):
1382 1382 return False
1383 1383
1384 1384 return True
1385 1385
1386 1386 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1387 1387 cachedelta, ifh, dfh, alwayscache=False):
1388 1388 """internal function to add revisions to the log
1389 1389
1390 1390 see addrevision for argument descriptions.
1391 1391 invariants:
1392 1392 - text is optional (can be None); if not set, cachedelta must be set.
1393 1393 if both are set, they must correspond to each other.
1394 1394 """
1395 1395 btext = [text]
1396 1396 def buildtext():
1397 1397 if btext[0] is not None:
1398 1398 return btext[0]
1399 1399 baserev = cachedelta[0]
1400 1400 delta = cachedelta[1]
1401 1401 # special case deltas which replace entire base; no need to decode
1402 1402 # base revision. this neatly avoids censored bases, which throw when
1403 1403 # they're decoded.
1404 1404 hlen = struct.calcsize(">lll")
1405 1405 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1406 1406 len(delta) - hlen):
1407 1407 btext[0] = delta[hlen:]
1408 1408 else:
1409 1409 if self._inline:
1410 1410 fh = ifh
1411 1411 else:
1412 1412 fh = dfh
1413 1413 basetext = self.revision(self.node(baserev), _df=fh)
1414 1414 btext[0] = mdiff.patch(basetext, delta)
1415 1415 try:
1416 1416 self.checkhash(btext[0], p1, p2, node)
1417 1417 if flags & REVIDX_ISCENSORED:
1418 1418 raise RevlogError(_('node %s is not censored') % node)
1419 1419 except CensoredNodeError:
1420 1420 # must pass the censored index flag to add censored revisions
1421 1421 if not flags & REVIDX_ISCENSORED:
1422 1422 raise
1423 1423 return btext[0]
1424 1424
1425 1425 def builddelta(rev):
1426 1426 # can we use the cached delta?
1427 1427 if cachedelta and cachedelta[0] == rev:
1428 1428 delta = cachedelta[1]
1429 1429 else:
1430 1430 t = buildtext()
1431 1431 if self.iscensored(rev):
1432 1432 # deltas based on a censored revision must replace the
1433 1433 # full content in one patch, so delta works everywhere
1434 1434 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1435 1435 delta = header + t
1436 1436 else:
1437 1437 if self._inline:
1438 1438 fh = ifh
1439 1439 else:
1440 1440 fh = dfh
1441 1441 ptext = self.revision(self.node(rev), _df=fh)
1442 1442 delta = mdiff.textdiff(ptext, t)
1443 data = self.compress(delta)
1444 l = len(data[1]) + len(data[0])
1443 header, data = self.compress(delta)
1444 deltalen = len(header) + len(data)
1445 1445 chainbase = self.chainbase(rev)
1446 dist = l + offset - self.start(chainbase)
1446 dist = deltalen + offset - self.start(chainbase)
1447 1447 if self._generaldelta:
1448 1448 base = rev
1449 1449 else:
1450 1450 base = chainbase
1451 1451 chainlen, compresseddeltalen = self._chaininfo(rev)
1452 1452 chainlen += 1
1453 compresseddeltalen += l
1454 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1453 compresseddeltalen += deltalen
1454 return (dist, deltalen, (header, data), base,
1455 chainbase, chainlen, compresseddeltalen)
1455 1456
1456 1457 curr = len(self)
1457 1458 prev = curr - 1
1458 1459 offset = self.end(prev)
1459 1460 delta = None
1460 1461 p1r, p2r = self.rev(p1), self.rev(p2)
1461 1462
1462 1463 # full versions are inserted when the needed deltas
1463 1464 # become comparable to the uncompressed text
1464 1465 if text is None:
1465 1466 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1466 1467 cachedelta[1])
1467 1468 else:
1468 1469 textlen = len(text)
1469 1470
1470 1471 # should we try to build a delta?
1471 1472 if prev != nullrev:
1472 1473 tested = set()
1473 1474 if cachedelta and self._generaldelta and self._lazydeltabase:
1474 1475 # Assume what we received from the server is a good choice
1475 1476 # build delta will reuse the cache
1476 1477 candidatedelta = builddelta(cachedelta[0])
1477 1478 tested.add(cachedelta[0])
1478 1479 if self._isgooddelta(candidatedelta, textlen):
1479 1480 delta = candidatedelta
1480 1481 if delta is None and self._generaldelta:
1481 1482 # exclude already lazy tested base if any
1482 1483 parents = [p for p in (p1r, p2r)
1483 1484 if p != nullrev and p not in tested]
1484 1485 if parents and not self._aggressivemergedeltas:
1485 1486 # Pick whichever parent is closer to us (to minimize the
1486 1487 # chance of having to build a fulltext).
1487 1488 parents = [max(parents)]
1488 1489 tested.update(parents)
1489 1490 pdeltas = []
1490 1491 for p in parents:
1491 1492 pd = builddelta(p)
1492 1493 if self._isgooddelta(pd, textlen):
1493 1494 pdeltas.append(pd)
1494 1495 if pdeltas:
1495 1496 delta = min(pdeltas, key=lambda x: x[1])
1496 1497 if delta is None and prev not in tested:
1497 1498 # other approach failed try against prev to hopefully save us a
1498 1499 # fulltext.
1499 1500 candidatedelta = builddelta(prev)
1500 1501 if self._isgooddelta(candidatedelta, textlen):
1501 1502 delta = candidatedelta
1502 1503 if delta is not None:
1503 1504 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1504 1505 else:
1505 1506 text = buildtext()
1506 1507 data = self.compress(text)
1507 1508 l = len(data[1]) + len(data[0])
1508 1509 base = chainbase = curr
1509 1510
1510 1511 e = (offset_type(offset, flags), l, textlen,
1511 1512 base, link, p1r, p2r, node)
1512 1513 self.index.insert(-1, e)
1513 1514 self.nodemap[node] = curr
1514 1515
1515 1516 entry = self._io.packentry(e, self.node, self.version, curr)
1516 1517 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1517 1518
1518 1519 if alwayscache and text is None:
1519 1520 text = buildtext()
1520 1521
1521 1522 if type(text) == str: # only accept immutable objects
1522 1523 self._cache = (node, curr, text)
1523 1524 self._chainbasecache[curr] = chainbase
1524 1525 return node
1525 1526
1526 1527 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1527 1528 # Files opened in a+ mode have inconsistent behavior on various
1528 1529 # platforms. Windows requires that a file positioning call be made
1529 1530 # when the file handle transitions between reads and writes. See
1530 1531 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1531 1532 # platforms, Python or the platform itself can be buggy. Some versions
1532 1533 # of Solaris have been observed to not append at the end of the file
1533 1534 # if the file was seeked to before the end. See issue4943 for more.
1534 1535 #
1535 1536 # We work around this issue by inserting a seek() before writing.
1536 1537 # Note: This is likely not necessary on Python 3.
1537 1538 ifh.seek(0, os.SEEK_END)
1538 1539 if dfh:
1539 1540 dfh.seek(0, os.SEEK_END)
1540 1541
1541 1542 curr = len(self) - 1
1542 1543 if not self._inline:
1543 1544 transaction.add(self.datafile, offset)
1544 1545 transaction.add(self.indexfile, curr * len(entry))
1545 1546 if data[0]:
1546 1547 dfh.write(data[0])
1547 1548 dfh.write(data[1])
1548 1549 ifh.write(entry)
1549 1550 else:
1550 1551 offset += curr * self._io.size
1551 1552 transaction.add(self.indexfile, offset, curr)
1552 1553 ifh.write(entry)
1553 1554 ifh.write(data[0])
1554 1555 ifh.write(data[1])
1555 1556 self.checkinlinesize(transaction, ifh)
1556 1557
1557 1558 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1558 1559 """
1559 1560 add a delta group
1560 1561
1561 1562 given a set of deltas, add them to the revision log. the
1562 1563 first delta is against its parent, which should be in our
1563 1564 log, the rest are against the previous delta.
1564 1565
1565 1566 If ``addrevisioncb`` is defined, it will be called with arguments of
1566 1567 this revlog and the node that was added.
1567 1568 """
1568 1569
1569 1570 # track the base of the current delta log
1570 1571 content = []
1571 1572 node = None
1572 1573
1573 1574 r = len(self)
1574 1575 end = 0
1575 1576 if r:
1576 1577 end = self.end(r - 1)
1577 1578 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1578 1579 isize = r * self._io.size
1579 1580 if self._inline:
1580 1581 transaction.add(self.indexfile, end + isize, r)
1581 1582 dfh = None
1582 1583 else:
1583 1584 transaction.add(self.indexfile, isize, r)
1584 1585 transaction.add(self.datafile, end)
1585 1586 dfh = self.opener(self.datafile, "a+")
1586 1587 def flush():
1587 1588 if dfh:
1588 1589 dfh.flush()
1589 1590 ifh.flush()
1590 1591 try:
1591 1592 # loop through our set of deltas
1592 1593 chain = None
1593 1594 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1594 1595 node = chunkdata['node']
1595 1596 p1 = chunkdata['p1']
1596 1597 p2 = chunkdata['p2']
1597 1598 cs = chunkdata['cs']
1598 1599 deltabase = chunkdata['deltabase']
1599 1600 delta = chunkdata['delta']
1600 1601 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1601 1602
1602 1603 content.append(node)
1603 1604
1604 1605 link = linkmapper(cs)
1605 1606 if node in self.nodemap:
1606 1607 # this can happen if two branches make the same change
1607 1608 chain = node
1608 1609 continue
1609 1610
1610 1611 for p in (p1, p2):
1611 1612 if p not in self.nodemap:
1612 1613 raise LookupError(p, self.indexfile,
1613 1614 _('unknown parent'))
1614 1615
1615 1616 if deltabase not in self.nodemap:
1616 1617 raise LookupError(deltabase, self.indexfile,
1617 1618 _('unknown delta base'))
1618 1619
1619 1620 baserev = self.rev(deltabase)
1620 1621
1621 1622 if baserev != nullrev and self.iscensored(baserev):
1622 1623 # if base is censored, delta must be full replacement in a
1623 1624 # single patch operation
1624 1625 hlen = struct.calcsize(">lll")
1625 1626 oldlen = self.rawsize(baserev)
1626 1627 newlen = len(delta) - hlen
1627 1628 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1628 1629 raise error.CensoredBaseError(self.indexfile,
1629 1630 self.node(baserev))
1630 1631
1631 1632 if not flags and self._peek_iscensored(baserev, delta, flush):
1632 1633 flags |= REVIDX_ISCENSORED
1633 1634
1634 1635 # We assume consumers of addrevisioncb will want to retrieve
1635 1636 # the added revision, which will require a call to
1636 1637 # revision(). revision() will fast path if there is a cache
1637 1638 # hit. So, we tell _addrevision() to always cache in this case.
1638 1639 chain = self._addrevision(node, None, transaction, link,
1639 1640 p1, p2, flags, (baserev, delta),
1640 1641 ifh, dfh,
1641 1642 alwayscache=bool(addrevisioncb))
1642 1643
1643 1644 if addrevisioncb:
1644 1645 addrevisioncb(self, chain)
1645 1646
1646 1647 if not dfh and not self._inline:
1647 1648 # addrevision switched from inline to conventional
1648 1649 # reopen the index
1649 1650 ifh.close()
1650 1651 dfh = self.opener(self.datafile, "a+")
1651 1652 ifh = self.opener(self.indexfile, "a+",
1652 1653 checkambig=self._checkambig)
1653 1654 finally:
1654 1655 if dfh:
1655 1656 dfh.close()
1656 1657 ifh.close()
1657 1658
1658 1659 return content
1659 1660
1660 1661 def iscensored(self, rev):
1661 1662 """Check if a file revision is censored."""
1662 1663 return False
1663 1664
1664 1665 def _peek_iscensored(self, baserev, delta, flush):
1665 1666 """Quickly check if a delta produces a censored revision."""
1666 1667 return False
1667 1668
1668 1669 def getstrippoint(self, minlink):
1669 1670 """find the minimum rev that must be stripped to strip the linkrev
1670 1671
1671 1672 Returns a tuple containing the minimum rev and a set of all revs that
1672 1673 have linkrevs that will be broken by this strip.
1673 1674 """
1674 1675 brokenrevs = set()
1675 1676 strippoint = len(self)
1676 1677
1677 1678 heads = {}
1678 1679 futurelargelinkrevs = set()
1679 1680 for head in self.headrevs():
1680 1681 headlinkrev = self.linkrev(head)
1681 1682 heads[head] = headlinkrev
1682 1683 if headlinkrev >= minlink:
1683 1684 futurelargelinkrevs.add(headlinkrev)
1684 1685
1685 1686 # This algorithm involves walking down the rev graph, starting at the
1686 1687 # heads. Since the revs are topologically sorted according to linkrev,
1687 1688 # once all head linkrevs are below the minlink, we know there are
1688 1689 # no more revs that could have a linkrev greater than minlink.
1689 1690 # So we can stop walking.
1690 1691 while futurelargelinkrevs:
1691 1692 strippoint -= 1
1692 1693 linkrev = heads.pop(strippoint)
1693 1694
1694 1695 if linkrev < minlink:
1695 1696 brokenrevs.add(strippoint)
1696 1697 else:
1697 1698 futurelargelinkrevs.remove(linkrev)
1698 1699
1699 1700 for p in self.parentrevs(strippoint):
1700 1701 if p != nullrev:
1701 1702 plinkrev = self.linkrev(p)
1702 1703 heads[p] = plinkrev
1703 1704 if plinkrev >= minlink:
1704 1705 futurelargelinkrevs.add(plinkrev)
1705 1706
1706 1707 return strippoint, brokenrevs
1707 1708
1708 1709 def strip(self, minlink, transaction):
1709 1710 """truncate the revlog on the first revision with a linkrev >= minlink
1710 1711
1711 1712 This function is called when we're stripping revision minlink and
1712 1713 its descendants from the repository.
1713 1714
1714 1715 We have to remove all revisions with linkrev >= minlink, because
1715 1716 the equivalent changelog revisions will be renumbered after the
1716 1717 strip.
1717 1718
1718 1719 So we truncate the revlog on the first of these revisions, and
1719 1720 trust that the caller has saved the revisions that shouldn't be
1720 1721 removed and that it'll re-add them after this truncation.
1721 1722 """
1722 1723 if len(self) == 0:
1723 1724 return
1724 1725
1725 1726 rev, _ = self.getstrippoint(minlink)
1726 1727 if rev == len(self):
1727 1728 return
1728 1729
1729 1730 # first truncate the files on disk
1730 1731 end = self.start(rev)
1731 1732 if not self._inline:
1732 1733 transaction.add(self.datafile, end)
1733 1734 end = rev * self._io.size
1734 1735 else:
1735 1736 end += rev * self._io.size
1736 1737
1737 1738 transaction.add(self.indexfile, end)
1738 1739
1739 1740 # then reset internal state in memory to forget those revisions
1740 1741 self._cache = None
1741 1742 self._chaininfocache = {}
1742 1743 self._chunkclear()
1743 1744 for x in xrange(rev, len(self)):
1744 1745 del self.nodemap[self.node(x)]
1745 1746
1746 1747 del self.index[rev:-1]
1747 1748
1748 1749 def checksize(self):
1749 1750 expected = 0
1750 1751 if len(self):
1751 1752 expected = max(0, self.end(len(self) - 1))
1752 1753
1753 1754 try:
1754 1755 f = self.opener(self.datafile)
1755 1756 f.seek(0, 2)
1756 1757 actual = f.tell()
1757 1758 f.close()
1758 1759 dd = actual - expected
1759 1760 except IOError as inst:
1760 1761 if inst.errno != errno.ENOENT:
1761 1762 raise
1762 1763 dd = 0
1763 1764
1764 1765 try:
1765 1766 f = self.opener(self.indexfile)
1766 1767 f.seek(0, 2)
1767 1768 actual = f.tell()
1768 1769 f.close()
1769 1770 s = self._io.size
1770 1771 i = max(0, actual // s)
1771 1772 di = actual - (i * s)
1772 1773 if self._inline:
1773 1774 databytes = 0
1774 1775 for r in self:
1775 1776 databytes += max(0, self.length(r))
1776 1777 dd = 0
1777 1778 di = actual - len(self) * s - databytes
1778 1779 except IOError as inst:
1779 1780 if inst.errno != errno.ENOENT:
1780 1781 raise
1781 1782 di = 0
1782 1783
1783 1784 return (dd, di)
1784 1785
1785 1786 def files(self):
1786 1787 res = [self.indexfile]
1787 1788 if not self._inline:
1788 1789 res.append(self.datafile)
1789 1790 return res
General Comments 0
You need to be logged in to leave comments. Login now