##// END OF EJS Templates
revlog: ensure that flags do not overflow 2 bytes...
Cotizo Sima -
r30543:03fae904 default
parent child Browse files
Show More
@@ -1,1820 +1,1822 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
76 raise ValueError('unknown revlog index flags')
75 77 return long(long(offset) << 16 | type)
76 78
77 79 _nullhash = hashlib.sha1(nullid)
78 80
79 81 def hash(text, p1, p2):
80 82 """generate a hash from the given text and its parent hashes
81 83
82 84 This hash combines both the current file contents and its history
83 85 in a manner that makes it easy to distinguish nodes with the same
84 86 content in the revision graph.
85 87 """
86 88 # As of now, if one of the parent node is null, p2 is null
87 89 if p2 == nullid:
88 90 # deep copy of a hash is faster than creating one
89 91 s = _nullhash.copy()
90 92 s.update(p1)
91 93 else:
92 94 # none of the parent nodes are nullid
93 95 l = [p1, p2]
94 96 l.sort()
95 97 s = hashlib.sha1(l[0])
96 98 s.update(l[1])
97 99 s.update(text)
98 100 return s.digest()
99 101
100 102 def decompress(bin):
101 103 """ decompress the given input """
102 104 if not bin:
103 105 return bin
104 106 t = bin[0]
105 107 if t == '\0':
106 108 return bin
107 109 if t == 'x':
108 110 try:
109 111 return _decompress(bin)
110 112 except zlib.error as e:
111 113 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 114 if t == 'u':
113 115 return util.buffer(bin, 1)
114 116 raise RevlogError(_("unknown compression type %r") % t)
115 117
116 118 # index v0:
117 119 # 4 bytes: offset
118 120 # 4 bytes: compressed length
119 121 # 4 bytes: base rev
120 122 # 4 bytes: link rev
121 123 # 20 bytes: parent 1 nodeid
122 124 # 20 bytes: parent 2 nodeid
123 125 # 20 bytes: nodeid
124 126 indexformatv0 = ">4l20s20s20s"
125 127
126 128 class revlogoldio(object):
127 129 def __init__(self):
128 130 self.size = struct.calcsize(indexformatv0)
129 131
130 132 def parseindex(self, data, inline):
131 133 s = self.size
132 134 index = []
133 135 nodemap = {nullid: nullrev}
134 136 n = off = 0
135 137 l = len(data)
136 138 while off + s <= l:
137 139 cur = data[off:off + s]
138 140 off += s
139 141 e = _unpack(indexformatv0, cur)
140 142 # transform to revlogv1 format
141 143 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 144 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 145 index.append(e2)
144 146 nodemap[e[6]] = n
145 147 n += 1
146 148
147 149 # add the magic null revision at -1
148 150 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 151
150 152 return index, nodemap, None
151 153
152 154 def packentry(self, entry, node, version, rev):
153 155 if gettype(entry[0]):
154 156 raise RevlogError(_("index entry flags need RevlogNG"))
155 157 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 158 node(entry[5]), node(entry[6]), entry[7])
157 159 return _pack(indexformatv0, *e2)
158 160
159 161 # index ng:
160 162 # 6 bytes: offset
161 163 # 2 bytes: flags
162 164 # 4 bytes: compressed length
163 165 # 4 bytes: uncompressed length
164 166 # 4 bytes: base rev
165 167 # 4 bytes: link rev
166 168 # 4 bytes: parent 1 rev
167 169 # 4 bytes: parent 2 rev
168 170 # 32 bytes: nodeid
169 171 indexformatng = ">Qiiiiii20s12x"
170 172 versionformat = ">I"
171 173
172 174 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 175 # signed integer)
174 176 _maxentrysize = 0x7fffffff
175 177
176 178 class revlogio(object):
177 179 def __init__(self):
178 180 self.size = struct.calcsize(indexformatng)
179 181
180 182 def parseindex(self, data, inline):
181 183 # call the C implementation to parse the index data
182 184 index, cache = parsers.parse_index2(data, inline)
183 185 return index, getattr(index, 'nodemap', None), cache
184 186
185 187 def packentry(self, entry, node, version, rev):
186 188 p = _pack(indexformatng, *entry)
187 189 if rev == 0:
188 190 p = _pack(versionformat, version) + p[4:]
189 191 return p
190 192
191 193 class revlog(object):
192 194 """
193 195 the underlying revision storage object
194 196
195 197 A revlog consists of two parts, an index and the revision data.
196 198
197 199 The index is a file with a fixed record size containing
198 200 information on each revision, including its nodeid (hash), the
199 201 nodeids of its parents, the position and offset of its data within
200 202 the data file, and the revision it's based on. Finally, each entry
201 203 contains a linkrev entry that can serve as a pointer to external
202 204 data.
203 205
204 206 The revision data itself is a linear collection of data chunks.
205 207 Each chunk represents a revision and is usually represented as a
206 208 delta against the previous chunk. To bound lookup time, runs of
207 209 deltas are limited to about 2 times the length of the original
208 210 version data. This makes retrieval of a version proportional to
209 211 its size, or O(1) relative to the number of revisions.
210 212
211 213 Both pieces of the revlog are written to in an append-only
212 214 fashion, which means we never need to rewrite a file to insert or
213 215 remove data, and can use some simple techniques to avoid the need
214 216 for locking while reading.
215 217
216 218 If checkambig, indexfile is opened with checkambig=True at
217 219 writing, to avoid file stat ambiguity.
218 220 """
219 221 def __init__(self, opener, indexfile, checkambig=False):
220 222 """
221 223 create a revlog object
222 224
223 225 opener is a function that abstracts the file opening operation
224 226 and can be used to implement COW semantics or the like.
225 227 """
226 228 self.indexfile = indexfile
227 229 self.datafile = indexfile[:-2] + ".d"
228 230 self.opener = opener
229 231 # When True, indexfile is opened with checkambig=True at writing, to
230 232 # avoid file stat ambiguity.
231 233 self._checkambig = checkambig
232 234 # 3-tuple of (node, rev, text) for a raw revision.
233 235 self._cache = None
234 236 # Maps rev to chain base rev.
235 237 self._chainbasecache = util.lrucachedict(100)
236 238 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 239 self._chunkcache = (0, '')
238 240 # How much data to read and cache into the raw revlog data cache.
239 241 self._chunkcachesize = 65536
240 242 self._maxchainlen = None
241 243 self._aggressivemergedeltas = False
242 244 self.index = []
243 245 # Mapping of partial identifiers to full nodes.
244 246 self._pcache = {}
245 247 # Mapping of revision integer to full node.
246 248 self._nodecache = {nullid: nullrev}
247 249 self._nodepos = None
248 250
249 251 v = REVLOG_DEFAULT_VERSION
250 252 opts = getattr(opener, 'options', None)
251 253 if opts is not None:
252 254 if 'revlogv1' in opts:
253 255 if 'generaldelta' in opts:
254 256 v |= REVLOGGENERALDELTA
255 257 else:
256 258 v = 0
257 259 if 'chunkcachesize' in opts:
258 260 self._chunkcachesize = opts['chunkcachesize']
259 261 if 'maxchainlen' in opts:
260 262 self._maxchainlen = opts['maxchainlen']
261 263 if 'aggressivemergedeltas' in opts:
262 264 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 265 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264 266
265 267 if self._chunkcachesize <= 0:
266 268 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 269 'than 0') % self._chunkcachesize)
268 270 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 271 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 272 'of 2') % self._chunkcachesize)
271 273
272 274 indexdata = ''
273 275 self._initempty = True
274 276 try:
275 277 f = self.opener(self.indexfile)
276 278 indexdata = f.read()
277 279 f.close()
278 280 if len(indexdata) > 0:
279 281 v = struct.unpack(versionformat, indexdata[:4])[0]
280 282 self._initempty = False
281 283 except IOError as inst:
282 284 if inst.errno != errno.ENOENT:
283 285 raise
284 286
285 287 self.version = v
286 288 self._inline = v & REVLOGNGINLINEDATA
287 289 self._generaldelta = v & REVLOGGENERALDELTA
288 290 flags = v & ~0xFFFF
289 291 fmt = v & 0xFFFF
290 292 if fmt == REVLOGV0 and flags:
291 293 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 294 % (self.indexfile, flags >> 16))
293 295 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 296 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 297 % (self.indexfile, flags >> 16))
296 298 elif fmt > REVLOGNG:
297 299 raise RevlogError(_("index %s unknown format %d")
298 300 % (self.indexfile, fmt))
299 301
300 302 self.storedeltachains = True
301 303
302 304 self._io = revlogio()
303 305 if self.version == REVLOGV0:
304 306 self._io = revlogoldio()
305 307 try:
306 308 d = self._io.parseindex(indexdata, self._inline)
307 309 except (ValueError, IndexError):
308 310 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 311 self.index, nodemap, self._chunkcache = d
310 312 if nodemap is not None:
311 313 self.nodemap = self._nodecache = nodemap
312 314 if not self._chunkcache:
313 315 self._chunkclear()
314 316 # revnum -> (chain-length, sum-delta-length)
315 317 self._chaininfocache = {}
316 318
317 319 def tip(self):
318 320 return self.node(len(self.index) - 2)
319 321 def __contains__(self, rev):
320 322 return 0 <= rev < len(self)
321 323 def __len__(self):
322 324 return len(self.index) - 1
323 325 def __iter__(self):
324 326 return iter(xrange(len(self)))
325 327 def revs(self, start=0, stop=None):
326 328 """iterate over all rev in this revlog (from start to stop)"""
327 329 step = 1
328 330 if stop is not None:
329 331 if start > stop:
330 332 step = -1
331 333 stop += step
332 334 else:
333 335 stop = len(self)
334 336 return xrange(start, stop, step)
335 337
336 338 @util.propertycache
337 339 def nodemap(self):
338 340 self.rev(self.node(0))
339 341 return self._nodecache
340 342
341 343 def hasnode(self, node):
342 344 try:
343 345 self.rev(node)
344 346 return True
345 347 except KeyError:
346 348 return False
347 349
348 350 def clearcaches(self):
349 351 self._cache = None
350 352 self._chainbasecache.clear()
351 353 self._chunkcache = (0, '')
352 354 self._pcache = {}
353 355
354 356 try:
355 357 self._nodecache.clearcaches()
356 358 except AttributeError:
357 359 self._nodecache = {nullid: nullrev}
358 360 self._nodepos = None
359 361
360 362 def rev(self, node):
361 363 try:
362 364 return self._nodecache[node]
363 365 except TypeError:
364 366 raise
365 367 except RevlogError:
366 368 # parsers.c radix tree lookup failed
367 369 raise LookupError(node, self.indexfile, _('no node'))
368 370 except KeyError:
369 371 # pure python cache lookup failed
370 372 n = self._nodecache
371 373 i = self.index
372 374 p = self._nodepos
373 375 if p is None:
374 376 p = len(i) - 2
375 377 for r in xrange(p, -1, -1):
376 378 v = i[r][7]
377 379 n[v] = r
378 380 if v == node:
379 381 self._nodepos = r - 1
380 382 return r
381 383 raise LookupError(node, self.indexfile, _('no node'))
382 384
383 385 # Accessors for index entries.
384 386
385 387 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 388 # are flags.
387 389 def start(self, rev):
388 390 return int(self.index[rev][0] >> 16)
389 391
390 392 def flags(self, rev):
391 393 return self.index[rev][0] & 0xFFFF
392 394
393 395 def length(self, rev):
394 396 return self.index[rev][1]
395 397
396 398 def rawsize(self, rev):
397 399 """return the length of the uncompressed text for a given revision"""
398 400 l = self.index[rev][2]
399 401 if l >= 0:
400 402 return l
401 403
402 404 t = self.revision(self.node(rev))
403 405 return len(t)
404 406 size = rawsize
405 407
406 408 def chainbase(self, rev):
407 409 base = self._chainbasecache.get(rev)
408 410 if base is not None:
409 411 return base
410 412
411 413 index = self.index
412 414 base = index[rev][3]
413 415 while base != rev:
414 416 rev = base
415 417 base = index[rev][3]
416 418
417 419 self._chainbasecache[rev] = base
418 420 return base
419 421
420 422 def linkrev(self, rev):
421 423 return self.index[rev][4]
422 424
423 425 def parentrevs(self, rev):
424 426 return self.index[rev][5:7]
425 427
426 428 def node(self, rev):
427 429 return self.index[rev][7]
428 430
429 431 # Derived from index values.
430 432
431 433 def end(self, rev):
432 434 return self.start(rev) + self.length(rev)
433 435
434 436 def parents(self, node):
435 437 i = self.index
436 438 d = i[self.rev(node)]
437 439 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438 440
439 441 def chainlen(self, rev):
440 442 return self._chaininfo(rev)[0]
441 443
442 444 def _chaininfo(self, rev):
443 445 chaininfocache = self._chaininfocache
444 446 if rev in chaininfocache:
445 447 return chaininfocache[rev]
446 448 index = self.index
447 449 generaldelta = self._generaldelta
448 450 iterrev = rev
449 451 e = index[iterrev]
450 452 clen = 0
451 453 compresseddeltalen = 0
452 454 while iterrev != e[3]:
453 455 clen += 1
454 456 compresseddeltalen += e[1]
455 457 if generaldelta:
456 458 iterrev = e[3]
457 459 else:
458 460 iterrev -= 1
459 461 if iterrev in chaininfocache:
460 462 t = chaininfocache[iterrev]
461 463 clen += t[0]
462 464 compresseddeltalen += t[1]
463 465 break
464 466 e = index[iterrev]
465 467 else:
466 468 # Add text length of base since decompressing that also takes
467 469 # work. For cache hits the length is already included.
468 470 compresseddeltalen += e[1]
469 471 r = (clen, compresseddeltalen)
470 472 chaininfocache[rev] = r
471 473 return r
472 474
473 475 def _deltachain(self, rev, stoprev=None):
474 476 """Obtain the delta chain for a revision.
475 477
476 478 ``stoprev`` specifies a revision to stop at. If not specified, we
477 479 stop at the base of the chain.
478 480
479 481 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
480 482 revs in ascending order and ``stopped`` is a bool indicating whether
481 483 ``stoprev`` was hit.
482 484 """
483 485 chain = []
484 486
485 487 # Alias to prevent attribute lookup in tight loop.
486 488 index = self.index
487 489 generaldelta = self._generaldelta
488 490
489 491 iterrev = rev
490 492 e = index[iterrev]
491 493 while iterrev != e[3] and iterrev != stoprev:
492 494 chain.append(iterrev)
493 495 if generaldelta:
494 496 iterrev = e[3]
495 497 else:
496 498 iterrev -= 1
497 499 e = index[iterrev]
498 500
499 501 if iterrev == stoprev:
500 502 stopped = True
501 503 else:
502 504 chain.append(iterrev)
503 505 stopped = False
504 506
505 507 chain.reverse()
506 508 return chain, stopped
507 509
508 510 def ancestors(self, revs, stoprev=0, inclusive=False):
509 511 """Generate the ancestors of 'revs' in reverse topological order.
510 512 Does not generate revs lower than stoprev.
511 513
512 514 See the documentation for ancestor.lazyancestors for more details."""
513 515
514 516 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
515 517 inclusive=inclusive)
516 518
517 519 def descendants(self, revs):
518 520 """Generate the descendants of 'revs' in revision order.
519 521
520 522 Yield a sequence of revision numbers starting with a child of
521 523 some rev in revs, i.e., each revision is *not* considered a
522 524 descendant of itself. Results are ordered by revision number (a
523 525 topological sort)."""
524 526 first = min(revs)
525 527 if first == nullrev:
526 528 for i in self:
527 529 yield i
528 530 return
529 531
530 532 seen = set(revs)
531 533 for i in self.revs(start=first + 1):
532 534 for x in self.parentrevs(i):
533 535 if x != nullrev and x in seen:
534 536 seen.add(i)
535 537 yield i
536 538 break
537 539
538 540 def findcommonmissing(self, common=None, heads=None):
539 541 """Return a tuple of the ancestors of common and the ancestors of heads
540 542 that are not ancestors of common. In revset terminology, we return the
541 543 tuple:
542 544
543 545 ::common, (::heads) - (::common)
544 546
545 547 The list is sorted by revision number, meaning it is
546 548 topologically sorted.
547 549
548 550 'heads' and 'common' are both lists of node IDs. If heads is
549 551 not supplied, uses all of the revlog's heads. If common is not
550 552 supplied, uses nullid."""
551 553 if common is None:
552 554 common = [nullid]
553 555 if heads is None:
554 556 heads = self.heads()
555 557
556 558 common = [self.rev(n) for n in common]
557 559 heads = [self.rev(n) for n in heads]
558 560
559 561 # we want the ancestors, but inclusive
560 562 class lazyset(object):
561 563 def __init__(self, lazyvalues):
562 564 self.addedvalues = set()
563 565 self.lazyvalues = lazyvalues
564 566
565 567 def __contains__(self, value):
566 568 return value in self.addedvalues or value in self.lazyvalues
567 569
568 570 def __iter__(self):
569 571 added = self.addedvalues
570 572 for r in added:
571 573 yield r
572 574 for r in self.lazyvalues:
573 575 if not r in added:
574 576 yield r
575 577
576 578 def add(self, value):
577 579 self.addedvalues.add(value)
578 580
579 581 def update(self, values):
580 582 self.addedvalues.update(values)
581 583
582 584 has = lazyset(self.ancestors(common))
583 585 has.add(nullrev)
584 586 has.update(common)
585 587
586 588 # take all ancestors from heads that aren't in has
587 589 missing = set()
588 590 visit = collections.deque(r for r in heads if r not in has)
589 591 while visit:
590 592 r = visit.popleft()
591 593 if r in missing:
592 594 continue
593 595 else:
594 596 missing.add(r)
595 597 for p in self.parentrevs(r):
596 598 if p not in has:
597 599 visit.append(p)
598 600 missing = list(missing)
599 601 missing.sort()
600 602 return has, [self.node(miss) for miss in missing]
601 603
602 604 def incrementalmissingrevs(self, common=None):
603 605 """Return an object that can be used to incrementally compute the
604 606 revision numbers of the ancestors of arbitrary sets that are not
605 607 ancestors of common. This is an ancestor.incrementalmissingancestors
606 608 object.
607 609
608 610 'common' is a list of revision numbers. If common is not supplied, uses
609 611 nullrev.
610 612 """
611 613 if common is None:
612 614 common = [nullrev]
613 615
614 616 return ancestor.incrementalmissingancestors(self.parentrevs, common)
615 617
616 618 def findmissingrevs(self, common=None, heads=None):
617 619 """Return the revision numbers of the ancestors of heads that
618 620 are not ancestors of common.
619 621
620 622 More specifically, return a list of revision numbers corresponding to
621 623 nodes N such that every N satisfies the following constraints:
622 624
623 625 1. N is an ancestor of some node in 'heads'
624 626 2. N is not an ancestor of any node in 'common'
625 627
626 628 The list is sorted by revision number, meaning it is
627 629 topologically sorted.
628 630
629 631 'heads' and 'common' are both lists of revision numbers. If heads is
630 632 not supplied, uses all of the revlog's heads. If common is not
631 633 supplied, uses nullid."""
632 634 if common is None:
633 635 common = [nullrev]
634 636 if heads is None:
635 637 heads = self.headrevs()
636 638
637 639 inc = self.incrementalmissingrevs(common=common)
638 640 return inc.missingancestors(heads)
639 641
640 642 def findmissing(self, common=None, heads=None):
641 643 """Return the ancestors of heads that are not ancestors of common.
642 644
643 645 More specifically, return a list of nodes N such that every N
644 646 satisfies the following constraints:
645 647
646 648 1. N is an ancestor of some node in 'heads'
647 649 2. N is not an ancestor of any node in 'common'
648 650
649 651 The list is sorted by revision number, meaning it is
650 652 topologically sorted.
651 653
652 654 'heads' and 'common' are both lists of node IDs. If heads is
653 655 not supplied, uses all of the revlog's heads. If common is not
654 656 supplied, uses nullid."""
655 657 if common is None:
656 658 common = [nullid]
657 659 if heads is None:
658 660 heads = self.heads()
659 661
660 662 common = [self.rev(n) for n in common]
661 663 heads = [self.rev(n) for n in heads]
662 664
663 665 inc = self.incrementalmissingrevs(common=common)
664 666 return [self.node(r) for r in inc.missingancestors(heads)]
665 667
666 668 def nodesbetween(self, roots=None, heads=None):
667 669 """Return a topological path from 'roots' to 'heads'.
668 670
669 671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
670 672 topologically sorted list of all nodes N that satisfy both of
671 673 these constraints:
672 674
673 675 1. N is a descendant of some node in 'roots'
674 676 2. N is an ancestor of some node in 'heads'
675 677
676 678 Every node is considered to be both a descendant and an ancestor
677 679 of itself, so every reachable node in 'roots' and 'heads' will be
678 680 included in 'nodes'.
679 681
680 682 'outroots' is the list of reachable nodes in 'roots', i.e., the
681 683 subset of 'roots' that is returned in 'nodes'. Likewise,
682 684 'outheads' is the subset of 'heads' that is also in 'nodes'.
683 685
684 686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
685 687 unspecified, uses nullid as the only root. If 'heads' is
686 688 unspecified, uses list of all of the revlog's heads."""
687 689 nonodes = ([], [], [])
688 690 if roots is not None:
689 691 roots = list(roots)
690 692 if not roots:
691 693 return nonodes
692 694 lowestrev = min([self.rev(n) for n in roots])
693 695 else:
694 696 roots = [nullid] # Everybody's a descendant of nullid
695 697 lowestrev = nullrev
696 698 if (lowestrev == nullrev) and (heads is None):
697 699 # We want _all_ the nodes!
698 700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
699 701 if heads is None:
700 702 # All nodes are ancestors, so the latest ancestor is the last
701 703 # node.
702 704 highestrev = len(self) - 1
703 705 # Set ancestors to None to signal that every node is an ancestor.
704 706 ancestors = None
705 707 # Set heads to an empty dictionary for later discovery of heads
706 708 heads = {}
707 709 else:
708 710 heads = list(heads)
709 711 if not heads:
710 712 return nonodes
711 713 ancestors = set()
712 714 # Turn heads into a dictionary so we can remove 'fake' heads.
713 715 # Also, later we will be using it to filter out the heads we can't
714 716 # find from roots.
715 717 heads = dict.fromkeys(heads, False)
716 718 # Start at the top and keep marking parents until we're done.
717 719 nodestotag = set(heads)
718 720 # Remember where the top was so we can use it as a limit later.
719 721 highestrev = max([self.rev(n) for n in nodestotag])
720 722 while nodestotag:
721 723 # grab a node to tag
722 724 n = nodestotag.pop()
723 725 # Never tag nullid
724 726 if n == nullid:
725 727 continue
726 728 # A node's revision number represents its place in a
727 729 # topologically sorted list of nodes.
728 730 r = self.rev(n)
729 731 if r >= lowestrev:
730 732 if n not in ancestors:
731 733 # If we are possibly a descendant of one of the roots
732 734 # and we haven't already been marked as an ancestor
733 735 ancestors.add(n) # Mark as ancestor
734 736 # Add non-nullid parents to list of nodes to tag.
735 737 nodestotag.update([p for p in self.parents(n) if
736 738 p != nullid])
737 739 elif n in heads: # We've seen it before, is it a fake head?
738 740 # So it is, real heads should not be the ancestors of
739 741 # any other heads.
740 742 heads.pop(n)
741 743 if not ancestors:
742 744 return nonodes
743 745 # Now that we have our set of ancestors, we want to remove any
744 746 # roots that are not ancestors.
745 747
746 748 # If one of the roots was nullid, everything is included anyway.
747 749 if lowestrev > nullrev:
748 750 # But, since we weren't, let's recompute the lowest rev to not
749 751 # include roots that aren't ancestors.
750 752
751 753 # Filter out roots that aren't ancestors of heads
752 754 roots = [root for root in roots if root in ancestors]
753 755 # Recompute the lowest revision
754 756 if roots:
755 757 lowestrev = min([self.rev(root) for root in roots])
756 758 else:
757 759 # No more roots? Return empty list
758 760 return nonodes
759 761 else:
760 762 # We are descending from nullid, and don't need to care about
761 763 # any other roots.
762 764 lowestrev = nullrev
763 765 roots = [nullid]
764 766 # Transform our roots list into a set.
765 767 descendants = set(roots)
766 768 # Also, keep the original roots so we can filter out roots that aren't
767 769 # 'real' roots (i.e. are descended from other roots).
768 770 roots = descendants.copy()
769 771 # Our topologically sorted list of output nodes.
770 772 orderedout = []
771 773 # Don't start at nullid since we don't want nullid in our output list,
772 774 # and if nullid shows up in descendants, empty parents will look like
773 775 # they're descendants.
774 776 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
775 777 n = self.node(r)
776 778 isdescendant = False
777 779 if lowestrev == nullrev: # Everybody is a descendant of nullid
778 780 isdescendant = True
779 781 elif n in descendants:
780 782 # n is already a descendant
781 783 isdescendant = True
782 784 # This check only needs to be done here because all the roots
783 785 # will start being marked is descendants before the loop.
784 786 if n in roots:
785 787 # If n was a root, check if it's a 'real' root.
786 788 p = tuple(self.parents(n))
787 789 # If any of its parents are descendants, it's not a root.
788 790 if (p[0] in descendants) or (p[1] in descendants):
789 791 roots.remove(n)
790 792 else:
791 793 p = tuple(self.parents(n))
792 794 # A node is a descendant if either of its parents are
793 795 # descendants. (We seeded the dependents list with the roots
794 796 # up there, remember?)
795 797 if (p[0] in descendants) or (p[1] in descendants):
796 798 descendants.add(n)
797 799 isdescendant = True
798 800 if isdescendant and ((ancestors is None) or (n in ancestors)):
799 801 # Only include nodes that are both descendants and ancestors.
800 802 orderedout.append(n)
801 803 if (ancestors is not None) and (n in heads):
802 804 # We're trying to figure out which heads are reachable
803 805 # from roots.
804 806 # Mark this head as having been reached
805 807 heads[n] = True
806 808 elif ancestors is None:
807 809 # Otherwise, we're trying to discover the heads.
808 810 # Assume this is a head because if it isn't, the next step
809 811 # will eventually remove it.
810 812 heads[n] = True
811 813 # But, obviously its parents aren't.
812 814 for p in self.parents(n):
813 815 heads.pop(p, None)
814 816 heads = [head for head, flag in heads.iteritems() if flag]
815 817 roots = list(roots)
816 818 assert orderedout
817 819 assert roots
818 820 assert heads
819 821 return (orderedout, roots, heads)
820 822
821 823 def headrevs(self):
822 824 try:
823 825 return self.index.headrevs()
824 826 except AttributeError:
825 827 return self._headrevs()
826 828
827 829 def computephases(self, roots):
828 830 return self.index.computephasesmapsets(roots)
829 831
830 832 def _headrevs(self):
831 833 count = len(self)
832 834 if not count:
833 835 return [nullrev]
834 836 # we won't iter over filtered rev so nobody is a head at start
835 837 ishead = [0] * (count + 1)
836 838 index = self.index
837 839 for r in self:
838 840 ishead[r] = 1 # I may be an head
839 841 e = index[r]
840 842 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
841 843 return [r for r, val in enumerate(ishead) if val]
842 844
843 845 def heads(self, start=None, stop=None):
844 846 """return the list of all nodes that have no children
845 847
846 848 if start is specified, only heads that are descendants of
847 849 start will be returned
848 850 if stop is specified, it will consider all the revs from stop
849 851 as if they had no children
850 852 """
851 853 if start is None and stop is None:
852 854 if not len(self):
853 855 return [nullid]
854 856 return [self.node(r) for r in self.headrevs()]
855 857
856 858 if start is None:
857 859 start = nullid
858 860 if stop is None:
859 861 stop = []
860 862 stoprevs = set([self.rev(n) for n in stop])
861 863 startrev = self.rev(start)
862 864 reachable = set((startrev,))
863 865 heads = set((startrev,))
864 866
865 867 parentrevs = self.parentrevs
866 868 for r in self.revs(start=startrev + 1):
867 869 for p in parentrevs(r):
868 870 if p in reachable:
869 871 if r not in stoprevs:
870 872 reachable.add(r)
871 873 heads.add(r)
872 874 if p in heads and p not in stoprevs:
873 875 heads.remove(p)
874 876
875 877 return [self.node(r) for r in heads]
876 878
877 879 def children(self, node):
878 880 """find the children of a given node"""
879 881 c = []
880 882 p = self.rev(node)
881 883 for r in self.revs(start=p + 1):
882 884 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
883 885 if prevs:
884 886 for pr in prevs:
885 887 if pr == p:
886 888 c.append(self.node(r))
887 889 elif p == nullrev:
888 890 c.append(self.node(r))
889 891 return c
890 892
891 893 def descendant(self, start, end):
892 894 if start == nullrev:
893 895 return True
894 896 for i in self.descendants([start]):
895 897 if i == end:
896 898 return True
897 899 elif i > end:
898 900 break
899 901 return False
900 902
901 903 def commonancestorsheads(self, a, b):
902 904 """calculate all the heads of the common ancestors of nodes a and b"""
903 905 a, b = self.rev(a), self.rev(b)
904 906 try:
905 907 ancs = self.index.commonancestorsheads(a, b)
906 908 except (AttributeError, OverflowError): # C implementation failed
907 909 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
908 910 return map(self.node, ancs)
909 911
910 912 def isancestor(self, a, b):
911 913 """return True if node a is an ancestor of node b
912 914
913 915 The implementation of this is trivial but the use of
914 916 commonancestorsheads is not."""
915 917 return a in self.commonancestorsheads(a, b)
916 918
917 919 def ancestor(self, a, b):
918 920 """calculate the "best" common ancestor of nodes a and b"""
919 921
920 922 a, b = self.rev(a), self.rev(b)
921 923 try:
922 924 ancs = self.index.ancestors(a, b)
923 925 except (AttributeError, OverflowError):
924 926 ancs = ancestor.ancestors(self.parentrevs, a, b)
925 927 if ancs:
926 928 # choose a consistent winner when there's a tie
927 929 return min(map(self.node, ancs))
928 930 return nullid
929 931
930 932 def _match(self, id):
931 933 if isinstance(id, int):
932 934 # rev
933 935 return self.node(id)
934 936 if len(id) == 20:
935 937 # possibly a binary node
936 938 # odds of a binary node being all hex in ASCII are 1 in 10**25
937 939 try:
938 940 node = id
939 941 self.rev(node) # quick search the index
940 942 return node
941 943 except LookupError:
942 944 pass # may be partial hex id
943 945 try:
944 946 # str(rev)
945 947 rev = int(id)
946 948 if str(rev) != id:
947 949 raise ValueError
948 950 if rev < 0:
949 951 rev = len(self) + rev
950 952 if rev < 0 or rev >= len(self):
951 953 raise ValueError
952 954 return self.node(rev)
953 955 except (ValueError, OverflowError):
954 956 pass
955 957 if len(id) == 40:
956 958 try:
957 959 # a full hex nodeid?
958 960 node = bin(id)
959 961 self.rev(node)
960 962 return node
961 963 except (TypeError, LookupError):
962 964 pass
963 965
964 966 def _partialmatch(self, id):
965 967 try:
966 968 partial = self.index.partialmatch(id)
967 969 if partial and self.hasnode(partial):
968 970 return partial
969 971 return None
970 972 except RevlogError:
971 973 # parsers.c radix tree lookup gave multiple matches
972 974 # fast path: for unfiltered changelog, radix tree is accurate
973 975 if not getattr(self, 'filteredrevs', None):
974 976 raise LookupError(id, self.indexfile,
975 977 _('ambiguous identifier'))
976 978 # fall through to slow path that filters hidden revisions
977 979 except (AttributeError, ValueError):
978 980 # we are pure python, or key was too short to search radix tree
979 981 pass
980 982
981 983 if id in self._pcache:
982 984 return self._pcache[id]
983 985
984 986 if len(id) < 40:
985 987 try:
986 988 # hex(node)[:...]
987 989 l = len(id) // 2 # grab an even number of digits
988 990 prefix = bin(id[:l * 2])
989 991 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
990 992 nl = [n for n in nl if hex(n).startswith(id) and
991 993 self.hasnode(n)]
992 994 if len(nl) > 0:
993 995 if len(nl) == 1:
994 996 self._pcache[id] = nl[0]
995 997 return nl[0]
996 998 raise LookupError(id, self.indexfile,
997 999 _('ambiguous identifier'))
998 1000 return None
999 1001 except TypeError:
1000 1002 pass
1001 1003
1002 1004 def lookup(self, id):
1003 1005 """locate a node based on:
1004 1006 - revision number or str(revision number)
1005 1007 - nodeid or subset of hex nodeid
1006 1008 """
1007 1009 n = self._match(id)
1008 1010 if n is not None:
1009 1011 return n
1010 1012 n = self._partialmatch(id)
1011 1013 if n:
1012 1014 return n
1013 1015
1014 1016 raise LookupError(id, self.indexfile, _('no match found'))
1015 1017
1016 1018 def cmp(self, node, text):
1017 1019 """compare text with a given file revision
1018 1020
1019 1021 returns True if text is different than what is stored.
1020 1022 """
1021 1023 p1, p2 = self.parents(node)
1022 1024 return hash(text, p1, p2) != node
1023 1025
1024 1026 def _addchunk(self, offset, data):
1025 1027 """Add a segment to the revlog cache.
1026 1028
1027 1029 Accepts an absolute offset and the data that is at that location.
1028 1030 """
1029 1031 o, d = self._chunkcache
1030 1032 # try to add to existing cache
1031 1033 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1032 1034 self._chunkcache = o, d + data
1033 1035 else:
1034 1036 self._chunkcache = offset, data
1035 1037
1036 1038 def _loadchunk(self, offset, length, df=None):
1037 1039 """Load a segment of raw data from the revlog.
1038 1040
1039 1041 Accepts an absolute offset, length to read, and an optional existing
1040 1042 file handle to read from.
1041 1043
1042 1044 If an existing file handle is passed, it will be seeked and the
1043 1045 original seek position will NOT be restored.
1044 1046
1045 1047 Returns a str or buffer of raw byte data.
1046 1048 """
1047 1049 if df is not None:
1048 1050 closehandle = False
1049 1051 else:
1050 1052 if self._inline:
1051 1053 df = self.opener(self.indexfile)
1052 1054 else:
1053 1055 df = self.opener(self.datafile)
1054 1056 closehandle = True
1055 1057
1056 1058 # Cache data both forward and backward around the requested
1057 1059 # data, in a fixed size window. This helps speed up operations
1058 1060 # involving reading the revlog backwards.
1059 1061 cachesize = self._chunkcachesize
1060 1062 realoffset = offset & ~(cachesize - 1)
1061 1063 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1062 1064 - realoffset)
1063 1065 df.seek(realoffset)
1064 1066 d = df.read(reallength)
1065 1067 if closehandle:
1066 1068 df.close()
1067 1069 self._addchunk(realoffset, d)
1068 1070 if offset != realoffset or reallength != length:
1069 1071 return util.buffer(d, offset - realoffset, length)
1070 1072 return d
1071 1073
1072 1074 def _getchunk(self, offset, length, df=None):
1073 1075 """Obtain a segment of raw data from the revlog.
1074 1076
1075 1077 Accepts an absolute offset, length of bytes to obtain, and an
1076 1078 optional file handle to the already-opened revlog. If the file
1077 1079 handle is used, it's original seek position will not be preserved.
1078 1080
1079 1081 Requests for data may be returned from a cache.
1080 1082
1081 1083 Returns a str or a buffer instance of raw byte data.
1082 1084 """
1083 1085 o, d = self._chunkcache
1084 1086 l = len(d)
1085 1087
1086 1088 # is it in the cache?
1087 1089 cachestart = offset - o
1088 1090 cacheend = cachestart + length
1089 1091 if cachestart >= 0 and cacheend <= l:
1090 1092 if cachestart == 0 and cacheend == l:
1091 1093 return d # avoid a copy
1092 1094 return util.buffer(d, cachestart, cacheend - cachestart)
1093 1095
1094 1096 return self._loadchunk(offset, length, df=df)
1095 1097
1096 1098 def _chunkraw(self, startrev, endrev, df=None):
1097 1099 """Obtain a segment of raw data corresponding to a range of revisions.
1098 1100
1099 1101 Accepts the start and end revisions and an optional already-open
1100 1102 file handle to be used for reading. If the file handle is read, its
1101 1103 seek position will not be preserved.
1102 1104
1103 1105 Requests for data may be satisfied by a cache.
1104 1106
1105 1107 Returns a 2-tuple of (offset, data) for the requested range of
1106 1108 revisions. Offset is the integer offset from the beginning of the
1107 1109 revlog and data is a str or buffer of the raw byte data.
1108 1110
1109 1111 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1110 1112 to determine where each revision's data begins and ends.
1111 1113 """
1112 1114 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1113 1115 # (functions are expensive).
1114 1116 index = self.index
1115 1117 istart = index[startrev]
1116 1118 start = int(istart[0] >> 16)
1117 1119 if startrev == endrev:
1118 1120 end = start + istart[1]
1119 1121 else:
1120 1122 iend = index[endrev]
1121 1123 end = int(iend[0] >> 16) + iend[1]
1122 1124
1123 1125 if self._inline:
1124 1126 start += (startrev + 1) * self._io.size
1125 1127 end += (endrev + 1) * self._io.size
1126 1128 length = end - start
1127 1129
1128 1130 return start, self._getchunk(start, length, df=df)
1129 1131
1130 1132 def _chunk(self, rev, df=None):
1131 1133 """Obtain a single decompressed chunk for a revision.
1132 1134
1133 1135 Accepts an integer revision and an optional already-open file handle
1134 1136 to be used for reading. If used, the seek position of the file will not
1135 1137 be preserved.
1136 1138
1137 1139 Returns a str holding uncompressed data for the requested revision.
1138 1140 """
1139 1141 return decompress(self._chunkraw(rev, rev, df=df)[1])
1140 1142
1141 1143 def _chunks(self, revs, df=None):
1142 1144 """Obtain decompressed chunks for the specified revisions.
1143 1145
1144 1146 Accepts an iterable of numeric revisions that are assumed to be in
1145 1147 ascending order. Also accepts an optional already-open file handle
1146 1148 to be used for reading. If used, the seek position of the file will
1147 1149 not be preserved.
1148 1150
1149 1151 This function is similar to calling ``self._chunk()`` multiple times,
1150 1152 but is faster.
1151 1153
1152 1154 Returns a list with decompressed data for each requested revision.
1153 1155 """
1154 1156 if not revs:
1155 1157 return []
1156 1158 start = self.start
1157 1159 length = self.length
1158 1160 inline = self._inline
1159 1161 iosize = self._io.size
1160 1162 buffer = util.buffer
1161 1163
1162 1164 l = []
1163 1165 ladd = l.append
1164 1166
1165 1167 try:
1166 1168 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1167 1169 except OverflowError:
1168 1170 # issue4215 - we can't cache a run of chunks greater than
1169 1171 # 2G on Windows
1170 1172 return [self._chunk(rev, df=df) for rev in revs]
1171 1173
1172 1174 for rev in revs:
1173 1175 chunkstart = start(rev)
1174 1176 if inline:
1175 1177 chunkstart += (rev + 1) * iosize
1176 1178 chunklength = length(rev)
1177 1179 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1178 1180
1179 1181 return l
1180 1182
1181 1183 def _chunkclear(self):
1182 1184 """Clear the raw chunk cache."""
1183 1185 self._chunkcache = (0, '')
1184 1186
1185 1187 def deltaparent(self, rev):
1186 1188 """return deltaparent of the given revision"""
1187 1189 base = self.index[rev][3]
1188 1190 if base == rev:
1189 1191 return nullrev
1190 1192 elif self._generaldelta:
1191 1193 return base
1192 1194 else:
1193 1195 return rev - 1
1194 1196
1195 1197 def revdiff(self, rev1, rev2):
1196 1198 """return or calculate a delta between two revisions"""
1197 1199 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1198 1200 return str(self._chunk(rev2))
1199 1201
1200 1202 return mdiff.textdiff(self.revision(rev1),
1201 1203 self.revision(rev2))
1202 1204
1203 1205 def revision(self, nodeorrev, _df=None):
1204 1206 """return an uncompressed revision of a given node or revision
1205 1207 number.
1206 1208
1207 1209 _df is an existing file handle to read from. It is meant to only be
1208 1210 used internally.
1209 1211 """
1210 1212 if isinstance(nodeorrev, int):
1211 1213 rev = nodeorrev
1212 1214 node = self.node(rev)
1213 1215 else:
1214 1216 node = nodeorrev
1215 1217 rev = None
1216 1218
1217 1219 cachedrev = None
1218 1220 if node == nullid:
1219 1221 return ""
1220 1222 if self._cache:
1221 1223 if self._cache[0] == node:
1222 1224 return self._cache[2]
1223 1225 cachedrev = self._cache[1]
1224 1226
1225 1227 # look up what we need to read
1226 1228 text = None
1227 1229 if rev is None:
1228 1230 rev = self.rev(node)
1229 1231
1230 1232 # check rev flags
1231 1233 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1232 1234 raise RevlogError(_('incompatible revision flag %x') %
1233 1235 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1234 1236
1235 1237 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1236 1238 if stopped:
1237 1239 text = self._cache[2]
1238 1240
1239 1241 # drop cache to save memory
1240 1242 self._cache = None
1241 1243
1242 1244 bins = self._chunks(chain, df=_df)
1243 1245 if text is None:
1244 1246 text = str(bins[0])
1245 1247 bins = bins[1:]
1246 1248
1247 1249 text = mdiff.patches(text, bins)
1248 1250
1249 1251 text = self._checkhash(text, node, rev)
1250 1252
1251 1253 self._cache = (node, rev, text)
1252 1254 return text
1253 1255
1254 1256 def hash(self, text, p1, p2):
1255 1257 """Compute a node hash.
1256 1258
1257 1259 Available as a function so that subclasses can replace the hash
1258 1260 as needed.
1259 1261 """
1260 1262 return hash(text, p1, p2)
1261 1263
1262 1264 def _checkhash(self, text, node, rev):
1263 1265 p1, p2 = self.parents(node)
1264 1266 self.checkhash(text, p1, p2, node, rev)
1265 1267 return text
1266 1268
1267 1269 def checkhash(self, text, p1, p2, node, rev=None):
1268 1270 if node != self.hash(text, p1, p2):
1269 1271 revornode = rev
1270 1272 if revornode is None:
1271 1273 revornode = templatefilters.short(hex(node))
1272 1274 raise RevlogError(_("integrity check failed on %s:%s")
1273 1275 % (self.indexfile, revornode))
1274 1276
1275 1277 def checkinlinesize(self, tr, fp=None):
1276 1278 """Check if the revlog is too big for inline and convert if so.
1277 1279
1278 1280 This should be called after revisions are added to the revlog. If the
1279 1281 revlog has grown too large to be an inline revlog, it will convert it
1280 1282 to use multiple index and data files.
1281 1283 """
1282 1284 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1283 1285 return
1284 1286
1285 1287 trinfo = tr.find(self.indexfile)
1286 1288 if trinfo is None:
1287 1289 raise RevlogError(_("%s not found in the transaction")
1288 1290 % self.indexfile)
1289 1291
1290 1292 trindex = trinfo[2]
1291 1293 if trindex is not None:
1292 1294 dataoff = self.start(trindex)
1293 1295 else:
1294 1296 # revlog was stripped at start of transaction, use all leftover data
1295 1297 trindex = len(self) - 1
1296 1298 dataoff = self.end(-2)
1297 1299
1298 1300 tr.add(self.datafile, dataoff)
1299 1301
1300 1302 if fp:
1301 1303 fp.flush()
1302 1304 fp.close()
1303 1305
1304 1306 df = self.opener(self.datafile, 'w')
1305 1307 try:
1306 1308 for r in self:
1307 1309 df.write(self._chunkraw(r, r)[1])
1308 1310 finally:
1309 1311 df.close()
1310 1312
1311 1313 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1312 1314 checkambig=self._checkambig)
1313 1315 self.version &= ~(REVLOGNGINLINEDATA)
1314 1316 self._inline = False
1315 1317 for i in self:
1316 1318 e = self._io.packentry(self.index[i], self.node, self.version, i)
1317 1319 fp.write(e)
1318 1320
1319 1321 # if we don't call close, the temp file will never replace the
1320 1322 # real index
1321 1323 fp.close()
1322 1324
1323 1325 tr.replace(self.indexfile, trindex * self._io.size)
1324 1326 self._chunkclear()
1325 1327
1326 1328 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1327 1329 node=None):
1328 1330 """add a revision to the log
1329 1331
1330 1332 text - the revision data to add
1331 1333 transaction - the transaction object used for rollback
1332 1334 link - the linkrev data to add
1333 1335 p1, p2 - the parent nodeids of the revision
1334 1336 cachedelta - an optional precomputed delta
1335 1337 node - nodeid of revision; typically node is not specified, and it is
1336 1338 computed by default as hash(text, p1, p2), however subclasses might
1337 1339 use different hashing method (and override checkhash() in such case)
1338 1340 """
1339 1341 if link == nullrev:
1340 1342 raise RevlogError(_("attempted to add linkrev -1 to %s")
1341 1343 % self.indexfile)
1342 1344
1343 1345 if len(text) > _maxentrysize:
1344 1346 raise RevlogError(
1345 1347 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1346 1348 % (self.indexfile, len(text)))
1347 1349
1348 1350 node = node or self.hash(text, p1, p2)
1349 1351 if node in self.nodemap:
1350 1352 return node
1351 1353
1352 1354 dfh = None
1353 1355 if not self._inline:
1354 1356 dfh = self.opener(self.datafile, "a+")
1355 1357 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1356 1358 try:
1357 1359 return self._addrevision(node, text, transaction, link, p1, p2,
1358 1360 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1359 1361 finally:
1360 1362 if dfh:
1361 1363 dfh.close()
1362 1364 ifh.close()
1363 1365
1364 1366 def compress(self, text):
1365 1367 """ generate a possibly-compressed representation of text """
1366 1368 if not text:
1367 1369 return ("", text)
1368 1370 l = len(text)
1369 1371 bin = None
1370 1372 if l < 44:
1371 1373 pass
1372 1374 elif l > 1000000:
1373 1375 # zlib makes an internal copy, thus doubling memory usage for
1374 1376 # large files, so lets do this in pieces
1375 1377 z = zlib.compressobj()
1376 1378 p = []
1377 1379 pos = 0
1378 1380 while pos < l:
1379 1381 pos2 = pos + 2**20
1380 1382 p.append(z.compress(text[pos:pos2]))
1381 1383 pos = pos2
1382 1384 p.append(z.flush())
1383 1385 if sum(map(len, p)) < l:
1384 1386 bin = "".join(p)
1385 1387 else:
1386 1388 bin = _compress(text)
1387 1389 if bin is None or len(bin) > l:
1388 1390 if text[0] == '\0':
1389 1391 return ("", text)
1390 1392 return ('u', text)
1391 1393 return ("", bin)
1392 1394
1393 1395 def _isgooddelta(self, d, textlen):
1394 1396 """Returns True if the given delta is good. Good means that it is within
1395 1397 the disk span, disk size, and chain length bounds that we know to be
1396 1398 performant."""
1397 1399 if d is None:
1398 1400 return False
1399 1401
1400 1402 # - 'dist' is the distance from the base revision -- bounding it limits
1401 1403 # the amount of I/O we need to do.
1402 1404 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1403 1405 # to apply -- bounding it limits the amount of CPU we consume.
1404 1406 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1405 1407 if (dist > textlen * 4 or l > textlen or
1406 1408 compresseddeltalen > textlen * 2 or
1407 1409 (self._maxchainlen and chainlen > self._maxchainlen)):
1408 1410 return False
1409 1411
1410 1412 return True
1411 1413
1412 1414 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1413 1415 cachedelta, ifh, dfh, alwayscache=False):
1414 1416 """internal function to add revisions to the log
1415 1417
1416 1418 see addrevision for argument descriptions.
1417 1419 invariants:
1418 1420 - text is optional (can be None); if not set, cachedelta must be set.
1419 1421 if both are set, they must correspond to each other.
1420 1422 """
1421 1423 btext = [text]
1422 1424 def buildtext():
1423 1425 if btext[0] is not None:
1424 1426 return btext[0]
1425 1427 baserev = cachedelta[0]
1426 1428 delta = cachedelta[1]
1427 1429 # special case deltas which replace entire base; no need to decode
1428 1430 # base revision. this neatly avoids censored bases, which throw when
1429 1431 # they're decoded.
1430 1432 hlen = struct.calcsize(">lll")
1431 1433 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1432 1434 len(delta) - hlen):
1433 1435 btext[0] = delta[hlen:]
1434 1436 else:
1435 1437 if self._inline:
1436 1438 fh = ifh
1437 1439 else:
1438 1440 fh = dfh
1439 1441 basetext = self.revision(self.node(baserev), _df=fh)
1440 1442 btext[0] = mdiff.patch(basetext, delta)
1441 1443 try:
1442 1444 self.checkhash(btext[0], p1, p2, node)
1443 1445 if flags & REVIDX_ISCENSORED:
1444 1446 raise RevlogError(_('node %s is not censored') % node)
1445 1447 except CensoredNodeError:
1446 1448 # must pass the censored index flag to add censored revisions
1447 1449 if not flags & REVIDX_ISCENSORED:
1448 1450 raise
1449 1451 return btext[0]
1450 1452
1451 1453 def builddelta(rev):
1452 1454 # can we use the cached delta?
1453 1455 if cachedelta and cachedelta[0] == rev:
1454 1456 delta = cachedelta[1]
1455 1457 else:
1456 1458 t = buildtext()
1457 1459 if self.iscensored(rev):
1458 1460 # deltas based on a censored revision must replace the
1459 1461 # full content in one patch, so delta works everywhere
1460 1462 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1461 1463 delta = header + t
1462 1464 else:
1463 1465 if self._inline:
1464 1466 fh = ifh
1465 1467 else:
1466 1468 fh = dfh
1467 1469 ptext = self.revision(self.node(rev), _df=fh)
1468 1470 delta = mdiff.textdiff(ptext, t)
1469 1471 header, data = self.compress(delta)
1470 1472 deltalen = len(header) + len(data)
1471 1473 chainbase = self.chainbase(rev)
1472 1474 dist = deltalen + offset - self.start(chainbase)
1473 1475 if self._generaldelta:
1474 1476 base = rev
1475 1477 else:
1476 1478 base = chainbase
1477 1479 chainlen, compresseddeltalen = self._chaininfo(rev)
1478 1480 chainlen += 1
1479 1481 compresseddeltalen += deltalen
1480 1482 return (dist, deltalen, (header, data), base,
1481 1483 chainbase, chainlen, compresseddeltalen)
1482 1484
1483 1485 curr = len(self)
1484 1486 prev = curr - 1
1485 1487 offset = self.end(prev)
1486 1488 delta = None
1487 1489 p1r, p2r = self.rev(p1), self.rev(p2)
1488 1490
1489 1491 # full versions are inserted when the needed deltas
1490 1492 # become comparable to the uncompressed text
1491 1493 if text is None:
1492 1494 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1493 1495 cachedelta[1])
1494 1496 else:
1495 1497 textlen = len(text)
1496 1498
1497 1499 # should we try to build a delta?
1498 1500 if prev != nullrev and self.storedeltachains:
1499 1501 tested = set()
1500 1502 # This condition is true most of the time when processing
1501 1503 # changegroup data into a generaldelta repo. The only time it
1502 1504 # isn't true is if this is the first revision in a delta chain
1503 1505 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1504 1506 if cachedelta and self._generaldelta and self._lazydeltabase:
1505 1507 # Assume what we received from the server is a good choice
1506 1508 # build delta will reuse the cache
1507 1509 candidatedelta = builddelta(cachedelta[0])
1508 1510 tested.add(cachedelta[0])
1509 1511 if self._isgooddelta(candidatedelta, textlen):
1510 1512 delta = candidatedelta
1511 1513 if delta is None and self._generaldelta:
1512 1514 # exclude already lazy tested base if any
1513 1515 parents = [p for p in (p1r, p2r)
1514 1516 if p != nullrev and p not in tested]
1515 1517 if parents and not self._aggressivemergedeltas:
1516 1518 # Pick whichever parent is closer to us (to minimize the
1517 1519 # chance of having to build a fulltext).
1518 1520 parents = [max(parents)]
1519 1521 tested.update(parents)
1520 1522 pdeltas = []
1521 1523 for p in parents:
1522 1524 pd = builddelta(p)
1523 1525 if self._isgooddelta(pd, textlen):
1524 1526 pdeltas.append(pd)
1525 1527 if pdeltas:
1526 1528 delta = min(pdeltas, key=lambda x: x[1])
1527 1529 if delta is None and prev not in tested:
1528 1530 # other approach failed try against prev to hopefully save us a
1529 1531 # fulltext.
1530 1532 candidatedelta = builddelta(prev)
1531 1533 if self._isgooddelta(candidatedelta, textlen):
1532 1534 delta = candidatedelta
1533 1535 if delta is not None:
1534 1536 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1535 1537 else:
1536 1538 text = buildtext()
1537 1539 data = self.compress(text)
1538 1540 l = len(data[1]) + len(data[0])
1539 1541 base = chainbase = curr
1540 1542
1541 1543 e = (offset_type(offset, flags), l, textlen,
1542 1544 base, link, p1r, p2r, node)
1543 1545 self.index.insert(-1, e)
1544 1546 self.nodemap[node] = curr
1545 1547
1546 1548 entry = self._io.packentry(e, self.node, self.version, curr)
1547 1549 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1548 1550
1549 1551 if alwayscache and text is None:
1550 1552 text = buildtext()
1551 1553
1552 1554 if type(text) == str: # only accept immutable objects
1553 1555 self._cache = (node, curr, text)
1554 1556 self._chainbasecache[curr] = chainbase
1555 1557 return node
1556 1558
1557 1559 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1558 1560 # Files opened in a+ mode have inconsistent behavior on various
1559 1561 # platforms. Windows requires that a file positioning call be made
1560 1562 # when the file handle transitions between reads and writes. See
1561 1563 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1562 1564 # platforms, Python or the platform itself can be buggy. Some versions
1563 1565 # of Solaris have been observed to not append at the end of the file
1564 1566 # if the file was seeked to before the end. See issue4943 for more.
1565 1567 #
1566 1568 # We work around this issue by inserting a seek() before writing.
1567 1569 # Note: This is likely not necessary on Python 3.
1568 1570 ifh.seek(0, os.SEEK_END)
1569 1571 if dfh:
1570 1572 dfh.seek(0, os.SEEK_END)
1571 1573
1572 1574 curr = len(self) - 1
1573 1575 if not self._inline:
1574 1576 transaction.add(self.datafile, offset)
1575 1577 transaction.add(self.indexfile, curr * len(entry))
1576 1578 if data[0]:
1577 1579 dfh.write(data[0])
1578 1580 dfh.write(data[1])
1579 1581 ifh.write(entry)
1580 1582 else:
1581 1583 offset += curr * self._io.size
1582 1584 transaction.add(self.indexfile, offset, curr)
1583 1585 ifh.write(entry)
1584 1586 ifh.write(data[0])
1585 1587 ifh.write(data[1])
1586 1588 self.checkinlinesize(transaction, ifh)
1587 1589
1588 1590 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1589 1591 """
1590 1592 add a delta group
1591 1593
1592 1594 given a set of deltas, add them to the revision log. the
1593 1595 first delta is against its parent, which should be in our
1594 1596 log, the rest are against the previous delta.
1595 1597
1596 1598 If ``addrevisioncb`` is defined, it will be called with arguments of
1597 1599 this revlog and the node that was added.
1598 1600 """
1599 1601
1600 1602 # track the base of the current delta log
1601 1603 content = []
1602 1604 node = None
1603 1605
1604 1606 r = len(self)
1605 1607 end = 0
1606 1608 if r:
1607 1609 end = self.end(r - 1)
1608 1610 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1609 1611 isize = r * self._io.size
1610 1612 if self._inline:
1611 1613 transaction.add(self.indexfile, end + isize, r)
1612 1614 dfh = None
1613 1615 else:
1614 1616 transaction.add(self.indexfile, isize, r)
1615 1617 transaction.add(self.datafile, end)
1616 1618 dfh = self.opener(self.datafile, "a+")
1617 1619 def flush():
1618 1620 if dfh:
1619 1621 dfh.flush()
1620 1622 ifh.flush()
1621 1623 try:
1622 1624 # loop through our set of deltas
1623 1625 chain = None
1624 1626 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1625 1627 node = chunkdata['node']
1626 1628 p1 = chunkdata['p1']
1627 1629 p2 = chunkdata['p2']
1628 1630 cs = chunkdata['cs']
1629 1631 deltabase = chunkdata['deltabase']
1630 1632 delta = chunkdata['delta']
1631 1633 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1632 1634
1633 1635 content.append(node)
1634 1636
1635 1637 link = linkmapper(cs)
1636 1638 if node in self.nodemap:
1637 1639 # this can happen if two branches make the same change
1638 1640 chain = node
1639 1641 continue
1640 1642
1641 1643 for p in (p1, p2):
1642 1644 if p not in self.nodemap:
1643 1645 raise LookupError(p, self.indexfile,
1644 1646 _('unknown parent'))
1645 1647
1646 1648 if deltabase not in self.nodemap:
1647 1649 raise LookupError(deltabase, self.indexfile,
1648 1650 _('unknown delta base'))
1649 1651
1650 1652 baserev = self.rev(deltabase)
1651 1653
1652 1654 if baserev != nullrev and self.iscensored(baserev):
1653 1655 # if base is censored, delta must be full replacement in a
1654 1656 # single patch operation
1655 1657 hlen = struct.calcsize(">lll")
1656 1658 oldlen = self.rawsize(baserev)
1657 1659 newlen = len(delta) - hlen
1658 1660 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1659 1661 raise error.CensoredBaseError(self.indexfile,
1660 1662 self.node(baserev))
1661 1663
1662 1664 if not flags and self._peek_iscensored(baserev, delta, flush):
1663 1665 flags |= REVIDX_ISCENSORED
1664 1666
1665 1667 # We assume consumers of addrevisioncb will want to retrieve
1666 1668 # the added revision, which will require a call to
1667 1669 # revision(). revision() will fast path if there is a cache
1668 1670 # hit. So, we tell _addrevision() to always cache in this case.
1669 1671 chain = self._addrevision(node, None, transaction, link,
1670 1672 p1, p2, flags, (baserev, delta),
1671 1673 ifh, dfh,
1672 1674 alwayscache=bool(addrevisioncb))
1673 1675
1674 1676 if addrevisioncb:
1675 1677 addrevisioncb(self, chain)
1676 1678
1677 1679 if not dfh and not self._inline:
1678 1680 # addrevision switched from inline to conventional
1679 1681 # reopen the index
1680 1682 ifh.close()
1681 1683 dfh = self.opener(self.datafile, "a+")
1682 1684 ifh = self.opener(self.indexfile, "a+",
1683 1685 checkambig=self._checkambig)
1684 1686 finally:
1685 1687 if dfh:
1686 1688 dfh.close()
1687 1689 ifh.close()
1688 1690
1689 1691 return content
1690 1692
1691 1693 def iscensored(self, rev):
1692 1694 """Check if a file revision is censored."""
1693 1695 return False
1694 1696
1695 1697 def _peek_iscensored(self, baserev, delta, flush):
1696 1698 """Quickly check if a delta produces a censored revision."""
1697 1699 return False
1698 1700
1699 1701 def getstrippoint(self, minlink):
1700 1702 """find the minimum rev that must be stripped to strip the linkrev
1701 1703
1702 1704 Returns a tuple containing the minimum rev and a set of all revs that
1703 1705 have linkrevs that will be broken by this strip.
1704 1706 """
1705 1707 brokenrevs = set()
1706 1708 strippoint = len(self)
1707 1709
1708 1710 heads = {}
1709 1711 futurelargelinkrevs = set()
1710 1712 for head in self.headrevs():
1711 1713 headlinkrev = self.linkrev(head)
1712 1714 heads[head] = headlinkrev
1713 1715 if headlinkrev >= minlink:
1714 1716 futurelargelinkrevs.add(headlinkrev)
1715 1717
1716 1718 # This algorithm involves walking down the rev graph, starting at the
1717 1719 # heads. Since the revs are topologically sorted according to linkrev,
1718 1720 # once all head linkrevs are below the minlink, we know there are
1719 1721 # no more revs that could have a linkrev greater than minlink.
1720 1722 # So we can stop walking.
1721 1723 while futurelargelinkrevs:
1722 1724 strippoint -= 1
1723 1725 linkrev = heads.pop(strippoint)
1724 1726
1725 1727 if linkrev < minlink:
1726 1728 brokenrevs.add(strippoint)
1727 1729 else:
1728 1730 futurelargelinkrevs.remove(linkrev)
1729 1731
1730 1732 for p in self.parentrevs(strippoint):
1731 1733 if p != nullrev:
1732 1734 plinkrev = self.linkrev(p)
1733 1735 heads[p] = plinkrev
1734 1736 if plinkrev >= minlink:
1735 1737 futurelargelinkrevs.add(plinkrev)
1736 1738
1737 1739 return strippoint, brokenrevs
1738 1740
1739 1741 def strip(self, minlink, transaction):
1740 1742 """truncate the revlog on the first revision with a linkrev >= minlink
1741 1743
1742 1744 This function is called when we're stripping revision minlink and
1743 1745 its descendants from the repository.
1744 1746
1745 1747 We have to remove all revisions with linkrev >= minlink, because
1746 1748 the equivalent changelog revisions will be renumbered after the
1747 1749 strip.
1748 1750
1749 1751 So we truncate the revlog on the first of these revisions, and
1750 1752 trust that the caller has saved the revisions that shouldn't be
1751 1753 removed and that it'll re-add them after this truncation.
1752 1754 """
1753 1755 if len(self) == 0:
1754 1756 return
1755 1757
1756 1758 rev, _ = self.getstrippoint(minlink)
1757 1759 if rev == len(self):
1758 1760 return
1759 1761
1760 1762 # first truncate the files on disk
1761 1763 end = self.start(rev)
1762 1764 if not self._inline:
1763 1765 transaction.add(self.datafile, end)
1764 1766 end = rev * self._io.size
1765 1767 else:
1766 1768 end += rev * self._io.size
1767 1769
1768 1770 transaction.add(self.indexfile, end)
1769 1771
1770 1772 # then reset internal state in memory to forget those revisions
1771 1773 self._cache = None
1772 1774 self._chaininfocache = {}
1773 1775 self._chunkclear()
1774 1776 for x in xrange(rev, len(self)):
1775 1777 del self.nodemap[self.node(x)]
1776 1778
1777 1779 del self.index[rev:-1]
1778 1780
1779 1781 def checksize(self):
1780 1782 expected = 0
1781 1783 if len(self):
1782 1784 expected = max(0, self.end(len(self) - 1))
1783 1785
1784 1786 try:
1785 1787 f = self.opener(self.datafile)
1786 1788 f.seek(0, 2)
1787 1789 actual = f.tell()
1788 1790 f.close()
1789 1791 dd = actual - expected
1790 1792 except IOError as inst:
1791 1793 if inst.errno != errno.ENOENT:
1792 1794 raise
1793 1795 dd = 0
1794 1796
1795 1797 try:
1796 1798 f = self.opener(self.indexfile)
1797 1799 f.seek(0, 2)
1798 1800 actual = f.tell()
1799 1801 f.close()
1800 1802 s = self._io.size
1801 1803 i = max(0, actual // s)
1802 1804 di = actual - (i * s)
1803 1805 if self._inline:
1804 1806 databytes = 0
1805 1807 for r in self:
1806 1808 databytes += max(0, self.length(r))
1807 1809 dd = 0
1808 1810 di = actual - len(self) * s - databytes
1809 1811 except IOError as inst:
1810 1812 if inst.errno != errno.ENOENT:
1811 1813 raise
1812 1814 di = 0
1813 1815
1814 1816 return (dd, di)
1815 1817
1816 1818 def files(self):
1817 1819 res = [self.indexfile]
1818 1820 if not self._inline:
1819 1821 res.append(self.datafile)
1820 1822 return res
General Comments 0
You need to be logged in to leave comments. Login now