##// END OF EJS Templates
revlog: pass revlog flags to addrevision...
Remi Chaintron -
r30744:e12c0fa1 default
parent child Browse files
Show More
@@ -1,1832 +1,1833 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
76 76 raise ValueError('unknown revlog index flags')
77 77 return long(long(offset) << 16 | type)
78 78
79 79 _nullhash = hashlib.sha1(nullid)
80 80
81 81 def hash(text, p1, p2):
82 82 """generate a hash from the given text and its parent hashes
83 83
84 84 This hash combines both the current file contents and its history
85 85 in a manner that makes it easy to distinguish nodes with the same
86 86 content in the revision graph.
87 87 """
88 88 # As of now, if one of the parent node is null, p2 is null
89 89 if p2 == nullid:
90 90 # deep copy of a hash is faster than creating one
91 91 s = _nullhash.copy()
92 92 s.update(p1)
93 93 else:
94 94 # none of the parent nodes are nullid
95 95 l = [p1, p2]
96 96 l.sort()
97 97 s = hashlib.sha1(l[0])
98 98 s.update(l[1])
99 99 s.update(text)
100 100 return s.digest()
101 101
102 102 def decompress(bin):
103 103 """ decompress the given input """
104 104 if not bin:
105 105 return bin
106 106 t = bin[0]
107 107 if t == '\0':
108 108 return bin
109 109 if t == 'x':
110 110 try:
111 111 return _decompress(bin)
112 112 except zlib.error as e:
113 113 raise RevlogError(_("revlog decompress error: %s") % str(e))
114 114 if t == 'u':
115 115 return util.buffer(bin, 1)
116 116 raise RevlogError(_("unknown compression type %r") % t)
117 117
118 118 # index v0:
119 119 # 4 bytes: offset
120 120 # 4 bytes: compressed length
121 121 # 4 bytes: base rev
122 122 # 4 bytes: link rev
123 123 # 20 bytes: parent 1 nodeid
124 124 # 20 bytes: parent 2 nodeid
125 125 # 20 bytes: nodeid
126 126 indexformatv0 = ">4l20s20s20s"
127 127
128 128 class revlogoldio(object):
129 129 def __init__(self):
130 130 self.size = struct.calcsize(indexformatv0)
131 131
132 132 def parseindex(self, data, inline):
133 133 s = self.size
134 134 index = []
135 135 nodemap = {nullid: nullrev}
136 136 n = off = 0
137 137 l = len(data)
138 138 while off + s <= l:
139 139 cur = data[off:off + s]
140 140 off += s
141 141 e = _unpack(indexformatv0, cur)
142 142 # transform to revlogv1 format
143 143 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
144 144 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
145 145 index.append(e2)
146 146 nodemap[e[6]] = n
147 147 n += 1
148 148
149 149 # add the magic null revision at -1
150 150 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
151 151
152 152 return index, nodemap, None
153 153
154 154 def packentry(self, entry, node, version, rev):
155 155 if gettype(entry[0]):
156 156 raise RevlogError(_("index entry flags need RevlogNG"))
157 157 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
158 158 node(entry[5]), node(entry[6]), entry[7])
159 159 return _pack(indexformatv0, *e2)
160 160
161 161 # index ng:
162 162 # 6 bytes: offset
163 163 # 2 bytes: flags
164 164 # 4 bytes: compressed length
165 165 # 4 bytes: uncompressed length
166 166 # 4 bytes: base rev
167 167 # 4 bytes: link rev
168 168 # 4 bytes: parent 1 rev
169 169 # 4 bytes: parent 2 rev
170 170 # 32 bytes: nodeid
171 171 indexformatng = ">Qiiiiii20s12x"
172 172 versionformat = ">I"
173 173
174 174 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
175 175 # signed integer)
176 176 _maxentrysize = 0x7fffffff
177 177
178 178 class revlogio(object):
179 179 def __init__(self):
180 180 self.size = struct.calcsize(indexformatng)
181 181
182 182 def parseindex(self, data, inline):
183 183 # call the C implementation to parse the index data
184 184 index, cache = parsers.parse_index2(data, inline)
185 185 return index, getattr(index, 'nodemap', None), cache
186 186
187 187 def packentry(self, entry, node, version, rev):
188 188 p = _pack(indexformatng, *entry)
189 189 if rev == 0:
190 190 p = _pack(versionformat, version) + p[4:]
191 191 return p
192 192
193 193 class revlog(object):
194 194 """
195 195 the underlying revision storage object
196 196
197 197 A revlog consists of two parts, an index and the revision data.
198 198
199 199 The index is a file with a fixed record size containing
200 200 information on each revision, including its nodeid (hash), the
201 201 nodeids of its parents, the position and offset of its data within
202 202 the data file, and the revision it's based on. Finally, each entry
203 203 contains a linkrev entry that can serve as a pointer to external
204 204 data.
205 205
206 206 The revision data itself is a linear collection of data chunks.
207 207 Each chunk represents a revision and is usually represented as a
208 208 delta against the previous chunk. To bound lookup time, runs of
209 209 deltas are limited to about 2 times the length of the original
210 210 version data. This makes retrieval of a version proportional to
211 211 its size, or O(1) relative to the number of revisions.
212 212
213 213 Both pieces of the revlog are written to in an append-only
214 214 fashion, which means we never need to rewrite a file to insert or
215 215 remove data, and can use some simple techniques to avoid the need
216 216 for locking while reading.
217 217
218 218 If checkambig, indexfile is opened with checkambig=True at
219 219 writing, to avoid file stat ambiguity.
220 220 """
221 221 def __init__(self, opener, indexfile, checkambig=False):
222 222 """
223 223 create a revlog object
224 224
225 225 opener is a function that abstracts the file opening operation
226 226 and can be used to implement COW semantics or the like.
227 227 """
228 228 self.indexfile = indexfile
229 229 self.datafile = indexfile[:-2] + ".d"
230 230 self.opener = opener
231 231 # When True, indexfile is opened with checkambig=True at writing, to
232 232 # avoid file stat ambiguity.
233 233 self._checkambig = checkambig
234 234 # 3-tuple of (node, rev, text) for a raw revision.
235 235 self._cache = None
236 236 # Maps rev to chain base rev.
237 237 self._chainbasecache = util.lrucachedict(100)
238 238 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
239 239 self._chunkcache = (0, '')
240 240 # How much data to read and cache into the raw revlog data cache.
241 241 self._chunkcachesize = 65536
242 242 self._maxchainlen = None
243 243 self._aggressivemergedeltas = False
244 244 self.index = []
245 245 # Mapping of partial identifiers to full nodes.
246 246 self._pcache = {}
247 247 # Mapping of revision integer to full node.
248 248 self._nodecache = {nullid: nullrev}
249 249 self._nodepos = None
250 250
251 251 v = REVLOG_DEFAULT_VERSION
252 252 opts = getattr(opener, 'options', None)
253 253 if opts is not None:
254 254 if 'revlogv1' in opts:
255 255 if 'generaldelta' in opts:
256 256 v |= REVLOGGENERALDELTA
257 257 else:
258 258 v = 0
259 259 if 'chunkcachesize' in opts:
260 260 self._chunkcachesize = opts['chunkcachesize']
261 261 if 'maxchainlen' in opts:
262 262 self._maxchainlen = opts['maxchainlen']
263 263 if 'aggressivemergedeltas' in opts:
264 264 self._aggressivemergedeltas = opts['aggressivemergedeltas']
265 265 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
266 266
267 267 if self._chunkcachesize <= 0:
268 268 raise RevlogError(_('revlog chunk cache size %r is not greater '
269 269 'than 0') % self._chunkcachesize)
270 270 elif self._chunkcachesize & (self._chunkcachesize - 1):
271 271 raise RevlogError(_('revlog chunk cache size %r is not a power '
272 272 'of 2') % self._chunkcachesize)
273 273
274 274 indexdata = ''
275 275 self._initempty = True
276 276 try:
277 277 f = self.opener(self.indexfile)
278 278 indexdata = f.read()
279 279 f.close()
280 280 if len(indexdata) > 0:
281 281 v = struct.unpack(versionformat, indexdata[:4])[0]
282 282 self._initempty = False
283 283 except IOError as inst:
284 284 if inst.errno != errno.ENOENT:
285 285 raise
286 286
287 287 self.version = v
288 288 self._inline = v & REVLOGNGINLINEDATA
289 289 self._generaldelta = v & REVLOGGENERALDELTA
290 290 flags = v & ~0xFFFF
291 291 fmt = v & 0xFFFF
292 292 if fmt == REVLOGV0 and flags:
293 293 raise RevlogError(_("index %s unknown flags %#04x for format v0")
294 294 % (self.indexfile, flags >> 16))
295 295 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
296 296 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
297 297 % (self.indexfile, flags >> 16))
298 298 elif fmt > REVLOGNG:
299 299 raise RevlogError(_("index %s unknown format %d")
300 300 % (self.indexfile, fmt))
301 301
302 302 self.storedeltachains = True
303 303
304 304 self._io = revlogio()
305 305 if self.version == REVLOGV0:
306 306 self._io = revlogoldio()
307 307 try:
308 308 d = self._io.parseindex(indexdata, self._inline)
309 309 except (ValueError, IndexError):
310 310 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
311 311 self.index, nodemap, self._chunkcache = d
312 312 if nodemap is not None:
313 313 self.nodemap = self._nodecache = nodemap
314 314 if not self._chunkcache:
315 315 self._chunkclear()
316 316 # revnum -> (chain-length, sum-delta-length)
317 317 self._chaininfocache = {}
318 318
319 319 def tip(self):
320 320 return self.node(len(self.index) - 2)
321 321 def __contains__(self, rev):
322 322 return 0 <= rev < len(self)
323 323 def __len__(self):
324 324 return len(self.index) - 1
325 325 def __iter__(self):
326 326 return iter(xrange(len(self)))
327 327 def revs(self, start=0, stop=None):
328 328 """iterate over all rev in this revlog (from start to stop)"""
329 329 step = 1
330 330 if stop is not None:
331 331 if start > stop:
332 332 step = -1
333 333 stop += step
334 334 else:
335 335 stop = len(self)
336 336 return xrange(start, stop, step)
337 337
338 338 @util.propertycache
339 339 def nodemap(self):
340 340 self.rev(self.node(0))
341 341 return self._nodecache
342 342
343 343 def hasnode(self, node):
344 344 try:
345 345 self.rev(node)
346 346 return True
347 347 except KeyError:
348 348 return False
349 349
350 350 def clearcaches(self):
351 351 self._cache = None
352 352 self._chainbasecache.clear()
353 353 self._chunkcache = (0, '')
354 354 self._pcache = {}
355 355
356 356 try:
357 357 self._nodecache.clearcaches()
358 358 except AttributeError:
359 359 self._nodecache = {nullid: nullrev}
360 360 self._nodepos = None
361 361
362 362 def rev(self, node):
363 363 try:
364 364 return self._nodecache[node]
365 365 except TypeError:
366 366 raise
367 367 except RevlogError:
368 368 # parsers.c radix tree lookup failed
369 369 raise LookupError(node, self.indexfile, _('no node'))
370 370 except KeyError:
371 371 # pure python cache lookup failed
372 372 n = self._nodecache
373 373 i = self.index
374 374 p = self._nodepos
375 375 if p is None:
376 376 p = len(i) - 2
377 377 for r in xrange(p, -1, -1):
378 378 v = i[r][7]
379 379 n[v] = r
380 380 if v == node:
381 381 self._nodepos = r - 1
382 382 return r
383 383 raise LookupError(node, self.indexfile, _('no node'))
384 384
385 385 # Accessors for index entries.
386 386
387 387 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
388 388 # are flags.
389 389 def start(self, rev):
390 390 return int(self.index[rev][0] >> 16)
391 391
392 392 def flags(self, rev):
393 393 return self.index[rev][0] & 0xFFFF
394 394
395 395 def length(self, rev):
396 396 return self.index[rev][1]
397 397
398 398 def rawsize(self, rev):
399 399 """return the length of the uncompressed text for a given revision"""
400 400 l = self.index[rev][2]
401 401 if l >= 0:
402 402 return l
403 403
404 404 t = self.revision(self.node(rev))
405 405 return len(t)
406 406 size = rawsize
407 407
408 408 def chainbase(self, rev):
409 409 base = self._chainbasecache.get(rev)
410 410 if base is not None:
411 411 return base
412 412
413 413 index = self.index
414 414 base = index[rev][3]
415 415 while base != rev:
416 416 rev = base
417 417 base = index[rev][3]
418 418
419 419 self._chainbasecache[rev] = base
420 420 return base
421 421
422 422 def linkrev(self, rev):
423 423 return self.index[rev][4]
424 424
425 425 def parentrevs(self, rev):
426 426 return self.index[rev][5:7]
427 427
428 428 def node(self, rev):
429 429 return self.index[rev][7]
430 430
431 431 # Derived from index values.
432 432
433 433 def end(self, rev):
434 434 return self.start(rev) + self.length(rev)
435 435
436 436 def parents(self, node):
437 437 i = self.index
438 438 d = i[self.rev(node)]
439 439 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
440 440
441 441 def chainlen(self, rev):
442 442 return self._chaininfo(rev)[0]
443 443
444 444 def _chaininfo(self, rev):
445 445 chaininfocache = self._chaininfocache
446 446 if rev in chaininfocache:
447 447 return chaininfocache[rev]
448 448 index = self.index
449 449 generaldelta = self._generaldelta
450 450 iterrev = rev
451 451 e = index[iterrev]
452 452 clen = 0
453 453 compresseddeltalen = 0
454 454 while iterrev != e[3]:
455 455 clen += 1
456 456 compresseddeltalen += e[1]
457 457 if generaldelta:
458 458 iterrev = e[3]
459 459 else:
460 460 iterrev -= 1
461 461 if iterrev in chaininfocache:
462 462 t = chaininfocache[iterrev]
463 463 clen += t[0]
464 464 compresseddeltalen += t[1]
465 465 break
466 466 e = index[iterrev]
467 467 else:
468 468 # Add text length of base since decompressing that also takes
469 469 # work. For cache hits the length is already included.
470 470 compresseddeltalen += e[1]
471 471 r = (clen, compresseddeltalen)
472 472 chaininfocache[rev] = r
473 473 return r
474 474
475 475 def _deltachain(self, rev, stoprev=None):
476 476 """Obtain the delta chain for a revision.
477 477
478 478 ``stoprev`` specifies a revision to stop at. If not specified, we
479 479 stop at the base of the chain.
480 480
481 481 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
482 482 revs in ascending order and ``stopped`` is a bool indicating whether
483 483 ``stoprev`` was hit.
484 484 """
485 485 chain = []
486 486
487 487 # Alias to prevent attribute lookup in tight loop.
488 488 index = self.index
489 489 generaldelta = self._generaldelta
490 490
491 491 iterrev = rev
492 492 e = index[iterrev]
493 493 while iterrev != e[3] and iterrev != stoprev:
494 494 chain.append(iterrev)
495 495 if generaldelta:
496 496 iterrev = e[3]
497 497 else:
498 498 iterrev -= 1
499 499 e = index[iterrev]
500 500
501 501 if iterrev == stoprev:
502 502 stopped = True
503 503 else:
504 504 chain.append(iterrev)
505 505 stopped = False
506 506
507 507 chain.reverse()
508 508 return chain, stopped
509 509
510 510 def ancestors(self, revs, stoprev=0, inclusive=False):
511 511 """Generate the ancestors of 'revs' in reverse topological order.
512 512 Does not generate revs lower than stoprev.
513 513
514 514 See the documentation for ancestor.lazyancestors for more details."""
515 515
516 516 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
517 517 inclusive=inclusive)
518 518
519 519 def descendants(self, revs):
520 520 """Generate the descendants of 'revs' in revision order.
521 521
522 522 Yield a sequence of revision numbers starting with a child of
523 523 some rev in revs, i.e., each revision is *not* considered a
524 524 descendant of itself. Results are ordered by revision number (a
525 525 topological sort)."""
526 526 first = min(revs)
527 527 if first == nullrev:
528 528 for i in self:
529 529 yield i
530 530 return
531 531
532 532 seen = set(revs)
533 533 for i in self.revs(start=first + 1):
534 534 for x in self.parentrevs(i):
535 535 if x != nullrev and x in seen:
536 536 seen.add(i)
537 537 yield i
538 538 break
539 539
540 540 def findcommonmissing(self, common=None, heads=None):
541 541 """Return a tuple of the ancestors of common and the ancestors of heads
542 542 that are not ancestors of common. In revset terminology, we return the
543 543 tuple:
544 544
545 545 ::common, (::heads) - (::common)
546 546
547 547 The list is sorted by revision number, meaning it is
548 548 topologically sorted.
549 549
550 550 'heads' and 'common' are both lists of node IDs. If heads is
551 551 not supplied, uses all of the revlog's heads. If common is not
552 552 supplied, uses nullid."""
553 553 if common is None:
554 554 common = [nullid]
555 555 if heads is None:
556 556 heads = self.heads()
557 557
558 558 common = [self.rev(n) for n in common]
559 559 heads = [self.rev(n) for n in heads]
560 560
561 561 # we want the ancestors, but inclusive
562 562 class lazyset(object):
563 563 def __init__(self, lazyvalues):
564 564 self.addedvalues = set()
565 565 self.lazyvalues = lazyvalues
566 566
567 567 def __contains__(self, value):
568 568 return value in self.addedvalues or value in self.lazyvalues
569 569
570 570 def __iter__(self):
571 571 added = self.addedvalues
572 572 for r in added:
573 573 yield r
574 574 for r in self.lazyvalues:
575 575 if not r in added:
576 576 yield r
577 577
578 578 def add(self, value):
579 579 self.addedvalues.add(value)
580 580
581 581 def update(self, values):
582 582 self.addedvalues.update(values)
583 583
584 584 has = lazyset(self.ancestors(common))
585 585 has.add(nullrev)
586 586 has.update(common)
587 587
588 588 # take all ancestors from heads that aren't in has
589 589 missing = set()
590 590 visit = collections.deque(r for r in heads if r not in has)
591 591 while visit:
592 592 r = visit.popleft()
593 593 if r in missing:
594 594 continue
595 595 else:
596 596 missing.add(r)
597 597 for p in self.parentrevs(r):
598 598 if p not in has:
599 599 visit.append(p)
600 600 missing = list(missing)
601 601 missing.sort()
602 602 return has, [self.node(miss) for miss in missing]
603 603
604 604 def incrementalmissingrevs(self, common=None):
605 605 """Return an object that can be used to incrementally compute the
606 606 revision numbers of the ancestors of arbitrary sets that are not
607 607 ancestors of common. This is an ancestor.incrementalmissingancestors
608 608 object.
609 609
610 610 'common' is a list of revision numbers. If common is not supplied, uses
611 611 nullrev.
612 612 """
613 613 if common is None:
614 614 common = [nullrev]
615 615
616 616 return ancestor.incrementalmissingancestors(self.parentrevs, common)
617 617
618 618 def findmissingrevs(self, common=None, heads=None):
619 619 """Return the revision numbers of the ancestors of heads that
620 620 are not ancestors of common.
621 621
622 622 More specifically, return a list of revision numbers corresponding to
623 623 nodes N such that every N satisfies the following constraints:
624 624
625 625 1. N is an ancestor of some node in 'heads'
626 626 2. N is not an ancestor of any node in 'common'
627 627
628 628 The list is sorted by revision number, meaning it is
629 629 topologically sorted.
630 630
631 631 'heads' and 'common' are both lists of revision numbers. If heads is
632 632 not supplied, uses all of the revlog's heads. If common is not
633 633 supplied, uses nullid."""
634 634 if common is None:
635 635 common = [nullrev]
636 636 if heads is None:
637 637 heads = self.headrevs()
638 638
639 639 inc = self.incrementalmissingrevs(common=common)
640 640 return inc.missingancestors(heads)
641 641
642 642 def findmissing(self, common=None, heads=None):
643 643 """Return the ancestors of heads that are not ancestors of common.
644 644
645 645 More specifically, return a list of nodes N such that every N
646 646 satisfies the following constraints:
647 647
648 648 1. N is an ancestor of some node in 'heads'
649 649 2. N is not an ancestor of any node in 'common'
650 650
651 651 The list is sorted by revision number, meaning it is
652 652 topologically sorted.
653 653
654 654 'heads' and 'common' are both lists of node IDs. If heads is
655 655 not supplied, uses all of the revlog's heads. If common is not
656 656 supplied, uses nullid."""
657 657 if common is None:
658 658 common = [nullid]
659 659 if heads is None:
660 660 heads = self.heads()
661 661
662 662 common = [self.rev(n) for n in common]
663 663 heads = [self.rev(n) for n in heads]
664 664
665 665 inc = self.incrementalmissingrevs(common=common)
666 666 return [self.node(r) for r in inc.missingancestors(heads)]
667 667
668 668 def nodesbetween(self, roots=None, heads=None):
669 669 """Return a topological path from 'roots' to 'heads'.
670 670
671 671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
672 672 topologically sorted list of all nodes N that satisfy both of
673 673 these constraints:
674 674
675 675 1. N is a descendant of some node in 'roots'
676 676 2. N is an ancestor of some node in 'heads'
677 677
678 678 Every node is considered to be both a descendant and an ancestor
679 679 of itself, so every reachable node in 'roots' and 'heads' will be
680 680 included in 'nodes'.
681 681
682 682 'outroots' is the list of reachable nodes in 'roots', i.e., the
683 683 subset of 'roots' that is returned in 'nodes'. Likewise,
684 684 'outheads' is the subset of 'heads' that is also in 'nodes'.
685 685
686 686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
687 687 unspecified, uses nullid as the only root. If 'heads' is
688 688 unspecified, uses list of all of the revlog's heads."""
689 689 nonodes = ([], [], [])
690 690 if roots is not None:
691 691 roots = list(roots)
692 692 if not roots:
693 693 return nonodes
694 694 lowestrev = min([self.rev(n) for n in roots])
695 695 else:
696 696 roots = [nullid] # Everybody's a descendant of nullid
697 697 lowestrev = nullrev
698 698 if (lowestrev == nullrev) and (heads is None):
699 699 # We want _all_ the nodes!
700 700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
701 701 if heads is None:
702 702 # All nodes are ancestors, so the latest ancestor is the last
703 703 # node.
704 704 highestrev = len(self) - 1
705 705 # Set ancestors to None to signal that every node is an ancestor.
706 706 ancestors = None
707 707 # Set heads to an empty dictionary for later discovery of heads
708 708 heads = {}
709 709 else:
710 710 heads = list(heads)
711 711 if not heads:
712 712 return nonodes
713 713 ancestors = set()
714 714 # Turn heads into a dictionary so we can remove 'fake' heads.
715 715 # Also, later we will be using it to filter out the heads we can't
716 716 # find from roots.
717 717 heads = dict.fromkeys(heads, False)
718 718 # Start at the top and keep marking parents until we're done.
719 719 nodestotag = set(heads)
720 720 # Remember where the top was so we can use it as a limit later.
721 721 highestrev = max([self.rev(n) for n in nodestotag])
722 722 while nodestotag:
723 723 # grab a node to tag
724 724 n = nodestotag.pop()
725 725 # Never tag nullid
726 726 if n == nullid:
727 727 continue
728 728 # A node's revision number represents its place in a
729 729 # topologically sorted list of nodes.
730 730 r = self.rev(n)
731 731 if r >= lowestrev:
732 732 if n not in ancestors:
733 733 # If we are possibly a descendant of one of the roots
734 734 # and we haven't already been marked as an ancestor
735 735 ancestors.add(n) # Mark as ancestor
736 736 # Add non-nullid parents to list of nodes to tag.
737 737 nodestotag.update([p for p in self.parents(n) if
738 738 p != nullid])
739 739 elif n in heads: # We've seen it before, is it a fake head?
740 740 # So it is, real heads should not be the ancestors of
741 741 # any other heads.
742 742 heads.pop(n)
743 743 if not ancestors:
744 744 return nonodes
745 745 # Now that we have our set of ancestors, we want to remove any
746 746 # roots that are not ancestors.
747 747
748 748 # If one of the roots was nullid, everything is included anyway.
749 749 if lowestrev > nullrev:
750 750 # But, since we weren't, let's recompute the lowest rev to not
751 751 # include roots that aren't ancestors.
752 752
753 753 # Filter out roots that aren't ancestors of heads
754 754 roots = [root for root in roots if root in ancestors]
755 755 # Recompute the lowest revision
756 756 if roots:
757 757 lowestrev = min([self.rev(root) for root in roots])
758 758 else:
759 759 # No more roots? Return empty list
760 760 return nonodes
761 761 else:
762 762 # We are descending from nullid, and don't need to care about
763 763 # any other roots.
764 764 lowestrev = nullrev
765 765 roots = [nullid]
766 766 # Transform our roots list into a set.
767 767 descendants = set(roots)
768 768 # Also, keep the original roots so we can filter out roots that aren't
769 769 # 'real' roots (i.e. are descended from other roots).
770 770 roots = descendants.copy()
771 771 # Our topologically sorted list of output nodes.
772 772 orderedout = []
773 773 # Don't start at nullid since we don't want nullid in our output list,
774 774 # and if nullid shows up in descendants, empty parents will look like
775 775 # they're descendants.
776 776 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
777 777 n = self.node(r)
778 778 isdescendant = False
779 779 if lowestrev == nullrev: # Everybody is a descendant of nullid
780 780 isdescendant = True
781 781 elif n in descendants:
782 782 # n is already a descendant
783 783 isdescendant = True
784 784 # This check only needs to be done here because all the roots
785 785 # will start being marked is descendants before the loop.
786 786 if n in roots:
787 787 # If n was a root, check if it's a 'real' root.
788 788 p = tuple(self.parents(n))
789 789 # If any of its parents are descendants, it's not a root.
790 790 if (p[0] in descendants) or (p[1] in descendants):
791 791 roots.remove(n)
792 792 else:
793 793 p = tuple(self.parents(n))
794 794 # A node is a descendant if either of its parents are
795 795 # descendants. (We seeded the dependents list with the roots
796 796 # up there, remember?)
797 797 if (p[0] in descendants) or (p[1] in descendants):
798 798 descendants.add(n)
799 799 isdescendant = True
800 800 if isdescendant and ((ancestors is None) or (n in ancestors)):
801 801 # Only include nodes that are both descendants and ancestors.
802 802 orderedout.append(n)
803 803 if (ancestors is not None) and (n in heads):
804 804 # We're trying to figure out which heads are reachable
805 805 # from roots.
806 806 # Mark this head as having been reached
807 807 heads[n] = True
808 808 elif ancestors is None:
809 809 # Otherwise, we're trying to discover the heads.
810 810 # Assume this is a head because if it isn't, the next step
811 811 # will eventually remove it.
812 812 heads[n] = True
813 813 # But, obviously its parents aren't.
814 814 for p in self.parents(n):
815 815 heads.pop(p, None)
816 816 heads = [head for head, flag in heads.iteritems() if flag]
817 817 roots = list(roots)
818 818 assert orderedout
819 819 assert roots
820 820 assert heads
821 821 return (orderedout, roots, heads)
822 822
823 823 def headrevs(self):
824 824 try:
825 825 return self.index.headrevs()
826 826 except AttributeError:
827 827 return self._headrevs()
828 828
829 829 def computephases(self, roots):
830 830 return self.index.computephasesmapsets(roots)
831 831
832 832 def _headrevs(self):
833 833 count = len(self)
834 834 if not count:
835 835 return [nullrev]
836 836 # we won't iter over filtered rev so nobody is a head at start
837 837 ishead = [0] * (count + 1)
838 838 index = self.index
839 839 for r in self:
840 840 ishead[r] = 1 # I may be an head
841 841 e = index[r]
842 842 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
843 843 return [r for r, val in enumerate(ishead) if val]
844 844
845 845 def heads(self, start=None, stop=None):
846 846 """return the list of all nodes that have no children
847 847
848 848 if start is specified, only heads that are descendants of
849 849 start will be returned
850 850 if stop is specified, it will consider all the revs from stop
851 851 as if they had no children
852 852 """
853 853 if start is None and stop is None:
854 854 if not len(self):
855 855 return [nullid]
856 856 return [self.node(r) for r in self.headrevs()]
857 857
858 858 if start is None:
859 859 start = nullid
860 860 if stop is None:
861 861 stop = []
862 862 stoprevs = set([self.rev(n) for n in stop])
863 863 startrev = self.rev(start)
864 864 reachable = set((startrev,))
865 865 heads = set((startrev,))
866 866
867 867 parentrevs = self.parentrevs
868 868 for r in self.revs(start=startrev + 1):
869 869 for p in parentrevs(r):
870 870 if p in reachable:
871 871 if r not in stoprevs:
872 872 reachable.add(r)
873 873 heads.add(r)
874 874 if p in heads and p not in stoprevs:
875 875 heads.remove(p)
876 876
877 877 return [self.node(r) for r in heads]
878 878
879 879 def children(self, node):
880 880 """find the children of a given node"""
881 881 c = []
882 882 p = self.rev(node)
883 883 for r in self.revs(start=p + 1):
884 884 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
885 885 if prevs:
886 886 for pr in prevs:
887 887 if pr == p:
888 888 c.append(self.node(r))
889 889 elif p == nullrev:
890 890 c.append(self.node(r))
891 891 return c
892 892
893 893 def descendant(self, start, end):
894 894 if start == nullrev:
895 895 return True
896 896 for i in self.descendants([start]):
897 897 if i == end:
898 898 return True
899 899 elif i > end:
900 900 break
901 901 return False
902 902
903 903 def commonancestorsheads(self, a, b):
904 904 """calculate all the heads of the common ancestors of nodes a and b"""
905 905 a, b = self.rev(a), self.rev(b)
906 906 try:
907 907 ancs = self.index.commonancestorsheads(a, b)
908 908 except (AttributeError, OverflowError): # C implementation failed
909 909 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
910 910 return map(self.node, ancs)
911 911
912 912 def isancestor(self, a, b):
913 913 """return True if node a is an ancestor of node b
914 914
915 915 The implementation of this is trivial but the use of
916 916 commonancestorsheads is not."""
917 917 return a in self.commonancestorsheads(a, b)
918 918
919 919 def ancestor(self, a, b):
920 920 """calculate the "best" common ancestor of nodes a and b"""
921 921
922 922 a, b = self.rev(a), self.rev(b)
923 923 try:
924 924 ancs = self.index.ancestors(a, b)
925 925 except (AttributeError, OverflowError):
926 926 ancs = ancestor.ancestors(self.parentrevs, a, b)
927 927 if ancs:
928 928 # choose a consistent winner when there's a tie
929 929 return min(map(self.node, ancs))
930 930 return nullid
931 931
932 932 def _match(self, id):
933 933 if isinstance(id, int):
934 934 # rev
935 935 return self.node(id)
936 936 if len(id) == 20:
937 937 # possibly a binary node
938 938 # odds of a binary node being all hex in ASCII are 1 in 10**25
939 939 try:
940 940 node = id
941 941 self.rev(node) # quick search the index
942 942 return node
943 943 except LookupError:
944 944 pass # may be partial hex id
945 945 try:
946 946 # str(rev)
947 947 rev = int(id)
948 948 if str(rev) != id:
949 949 raise ValueError
950 950 if rev < 0:
951 951 rev = len(self) + rev
952 952 if rev < 0 or rev >= len(self):
953 953 raise ValueError
954 954 return self.node(rev)
955 955 except (ValueError, OverflowError):
956 956 pass
957 957 if len(id) == 40:
958 958 try:
959 959 # a full hex nodeid?
960 960 node = bin(id)
961 961 self.rev(node)
962 962 return node
963 963 except (TypeError, LookupError):
964 964 pass
965 965
966 966 def _partialmatch(self, id):
967 967 try:
968 968 partial = self.index.partialmatch(id)
969 969 if partial and self.hasnode(partial):
970 970 return partial
971 971 return None
972 972 except RevlogError:
973 973 # parsers.c radix tree lookup gave multiple matches
974 974 # fast path: for unfiltered changelog, radix tree is accurate
975 975 if not getattr(self, 'filteredrevs', None):
976 976 raise LookupError(id, self.indexfile,
977 977 _('ambiguous identifier'))
978 978 # fall through to slow path that filters hidden revisions
979 979 except (AttributeError, ValueError):
980 980 # we are pure python, or key was too short to search radix tree
981 981 pass
982 982
983 983 if id in self._pcache:
984 984 return self._pcache[id]
985 985
986 986 if len(id) < 40:
987 987 try:
988 988 # hex(node)[:...]
989 989 l = len(id) // 2 # grab an even number of digits
990 990 prefix = bin(id[:l * 2])
991 991 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
992 992 nl = [n for n in nl if hex(n).startswith(id) and
993 993 self.hasnode(n)]
994 994 if len(nl) > 0:
995 995 if len(nl) == 1:
996 996 self._pcache[id] = nl[0]
997 997 return nl[0]
998 998 raise LookupError(id, self.indexfile,
999 999 _('ambiguous identifier'))
1000 1000 return None
1001 1001 except TypeError:
1002 1002 pass
1003 1003
1004 1004 def lookup(self, id):
1005 1005 """locate a node based on:
1006 1006 - revision number or str(revision number)
1007 1007 - nodeid or subset of hex nodeid
1008 1008 """
1009 1009 n = self._match(id)
1010 1010 if n is not None:
1011 1011 return n
1012 1012 n = self._partialmatch(id)
1013 1013 if n:
1014 1014 return n
1015 1015
1016 1016 raise LookupError(id, self.indexfile, _('no match found'))
1017 1017
1018 1018 def cmp(self, node, text):
1019 1019 """compare text with a given file revision
1020 1020
1021 1021 returns True if text is different than what is stored.
1022 1022 """
1023 1023 p1, p2 = self.parents(node)
1024 1024 return hash(text, p1, p2) != node
1025 1025
1026 1026 def _addchunk(self, offset, data):
1027 1027 """Add a segment to the revlog cache.
1028 1028
1029 1029 Accepts an absolute offset and the data that is at that location.
1030 1030 """
1031 1031 o, d = self._chunkcache
1032 1032 # try to add to existing cache
1033 1033 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1034 1034 self._chunkcache = o, d + data
1035 1035 else:
1036 1036 self._chunkcache = offset, data
1037 1037
1038 1038 def _loadchunk(self, offset, length, df=None):
1039 1039 """Load a segment of raw data from the revlog.
1040 1040
1041 1041 Accepts an absolute offset, length to read, and an optional existing
1042 1042 file handle to read from.
1043 1043
1044 1044 If an existing file handle is passed, it will be seeked and the
1045 1045 original seek position will NOT be restored.
1046 1046
1047 1047 Returns a str or buffer of raw byte data.
1048 1048 """
1049 1049 if df is not None:
1050 1050 closehandle = False
1051 1051 else:
1052 1052 if self._inline:
1053 1053 df = self.opener(self.indexfile)
1054 1054 else:
1055 1055 df = self.opener(self.datafile)
1056 1056 closehandle = True
1057 1057
1058 1058 # Cache data both forward and backward around the requested
1059 1059 # data, in a fixed size window. This helps speed up operations
1060 1060 # involving reading the revlog backwards.
1061 1061 cachesize = self._chunkcachesize
1062 1062 realoffset = offset & ~(cachesize - 1)
1063 1063 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1064 1064 - realoffset)
1065 1065 df.seek(realoffset)
1066 1066 d = df.read(reallength)
1067 1067 if closehandle:
1068 1068 df.close()
1069 1069 self._addchunk(realoffset, d)
1070 1070 if offset != realoffset or reallength != length:
1071 1071 return util.buffer(d, offset - realoffset, length)
1072 1072 return d
1073 1073
1074 1074 def _getchunk(self, offset, length, df=None):
1075 1075 """Obtain a segment of raw data from the revlog.
1076 1076
1077 1077 Accepts an absolute offset, length of bytes to obtain, and an
1078 1078 optional file handle to the already-opened revlog. If the file
1079 1079 handle is used, it's original seek position will not be preserved.
1080 1080
1081 1081 Requests for data may be returned from a cache.
1082 1082
1083 1083 Returns a str or a buffer instance of raw byte data.
1084 1084 """
1085 1085 o, d = self._chunkcache
1086 1086 l = len(d)
1087 1087
1088 1088 # is it in the cache?
1089 1089 cachestart = offset - o
1090 1090 cacheend = cachestart + length
1091 1091 if cachestart >= 0 and cacheend <= l:
1092 1092 if cachestart == 0 and cacheend == l:
1093 1093 return d # avoid a copy
1094 1094 return util.buffer(d, cachestart, cacheend - cachestart)
1095 1095
1096 1096 return self._loadchunk(offset, length, df=df)
1097 1097
1098 1098 def _chunkraw(self, startrev, endrev, df=None):
1099 1099 """Obtain a segment of raw data corresponding to a range of revisions.
1100 1100
1101 1101 Accepts the start and end revisions and an optional already-open
1102 1102 file handle to be used for reading. If the file handle is read, its
1103 1103 seek position will not be preserved.
1104 1104
1105 1105 Requests for data may be satisfied by a cache.
1106 1106
1107 1107 Returns a 2-tuple of (offset, data) for the requested range of
1108 1108 revisions. Offset is the integer offset from the beginning of the
1109 1109 revlog and data is a str or buffer of the raw byte data.
1110 1110
1111 1111 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1112 1112 to determine where each revision's data begins and ends.
1113 1113 """
1114 1114 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1115 1115 # (functions are expensive).
1116 1116 index = self.index
1117 1117 istart = index[startrev]
1118 1118 start = int(istart[0] >> 16)
1119 1119 if startrev == endrev:
1120 1120 end = start + istart[1]
1121 1121 else:
1122 1122 iend = index[endrev]
1123 1123 end = int(iend[0] >> 16) + iend[1]
1124 1124
1125 1125 if self._inline:
1126 1126 start += (startrev + 1) * self._io.size
1127 1127 end += (endrev + 1) * self._io.size
1128 1128 length = end - start
1129 1129
1130 1130 return start, self._getchunk(start, length, df=df)
1131 1131
1132 1132 def _chunk(self, rev, df=None):
1133 1133 """Obtain a single decompressed chunk for a revision.
1134 1134
1135 1135 Accepts an integer revision and an optional already-open file handle
1136 1136 to be used for reading. If used, the seek position of the file will not
1137 1137 be preserved.
1138 1138
1139 1139 Returns a str holding uncompressed data for the requested revision.
1140 1140 """
1141 1141 return decompress(self._chunkraw(rev, rev, df=df)[1])
1142 1142
1143 1143 def _chunks(self, revs, df=None):
1144 1144 """Obtain decompressed chunks for the specified revisions.
1145 1145
1146 1146 Accepts an iterable of numeric revisions that are assumed to be in
1147 1147 ascending order. Also accepts an optional already-open file handle
1148 1148 to be used for reading. If used, the seek position of the file will
1149 1149 not be preserved.
1150 1150
1151 1151 This function is similar to calling ``self._chunk()`` multiple times,
1152 1152 but is faster.
1153 1153
1154 1154 Returns a list with decompressed data for each requested revision.
1155 1155 """
1156 1156 if not revs:
1157 1157 return []
1158 1158 start = self.start
1159 1159 length = self.length
1160 1160 inline = self._inline
1161 1161 iosize = self._io.size
1162 1162 buffer = util.buffer
1163 1163
1164 1164 l = []
1165 1165 ladd = l.append
1166 1166
1167 1167 try:
1168 1168 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1169 1169 except OverflowError:
1170 1170 # issue4215 - we can't cache a run of chunks greater than
1171 1171 # 2G on Windows
1172 1172 return [self._chunk(rev, df=df) for rev in revs]
1173 1173
1174 1174 for rev in revs:
1175 1175 chunkstart = start(rev)
1176 1176 if inline:
1177 1177 chunkstart += (rev + 1) * iosize
1178 1178 chunklength = length(rev)
1179 1179 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1180 1180
1181 1181 return l
1182 1182
1183 1183 def _chunkclear(self):
1184 1184 """Clear the raw chunk cache."""
1185 1185 self._chunkcache = (0, '')
1186 1186
1187 1187 def deltaparent(self, rev):
1188 1188 """return deltaparent of the given revision"""
1189 1189 base = self.index[rev][3]
1190 1190 if base == rev:
1191 1191 return nullrev
1192 1192 elif self._generaldelta:
1193 1193 return base
1194 1194 else:
1195 1195 return rev - 1
1196 1196
1197 1197 def revdiff(self, rev1, rev2):
1198 1198 """return or calculate a delta between two revisions"""
1199 1199 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1200 1200 return str(self._chunk(rev2))
1201 1201
1202 1202 return mdiff.textdiff(self.revision(rev1),
1203 1203 self.revision(rev2))
1204 1204
1205 1205 def revision(self, nodeorrev, _df=None, raw=False):
1206 1206 """return an uncompressed revision of a given node or revision
1207 1207 number.
1208 1208
1209 1209 _df - an existing file handle to read from. (internal-only)
1210 1210 raw - an optional argument specifying if the revision data is to be
1211 1211 treated as raw data when applying flag transforms. 'raw' should be set
1212 1212 to True when generating changegroups or in debug commands.
1213 1213 """
1214 1214 if isinstance(nodeorrev, int):
1215 1215 rev = nodeorrev
1216 1216 node = self.node(rev)
1217 1217 else:
1218 1218 node = nodeorrev
1219 1219 rev = None
1220 1220
1221 1221 cachedrev = None
1222 1222 if node == nullid:
1223 1223 return ""
1224 1224 if self._cache:
1225 1225 if self._cache[0] == node:
1226 1226 return self._cache[2]
1227 1227 cachedrev = self._cache[1]
1228 1228
1229 1229 # look up what we need to read
1230 1230 text = None
1231 1231 if rev is None:
1232 1232 rev = self.rev(node)
1233 1233
1234 1234 # check rev flags
1235 1235 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1236 1236 raise RevlogError(_('incompatible revision flag %x') %
1237 1237 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1238 1238
1239 1239 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1240 1240 if stopped:
1241 1241 text = self._cache[2]
1242 1242
1243 1243 # drop cache to save memory
1244 1244 self._cache = None
1245 1245
1246 1246 bins = self._chunks(chain, df=_df)
1247 1247 if text is None:
1248 1248 text = str(bins[0])
1249 1249 bins = bins[1:]
1250 1250
1251 1251 text = mdiff.patches(text, bins)
1252 1252 self.checkhash(text, node, rev=rev)
1253 1253 self._cache = (node, rev, text)
1254 1254 return text
1255 1255
1256 1256 def hash(self, text, p1, p2):
1257 1257 """Compute a node hash.
1258 1258
1259 1259 Available as a function so that subclasses can replace the hash
1260 1260 as needed.
1261 1261 """
1262 1262 return hash(text, p1, p2)
1263 1263
1264 1264 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1265 1265 """Check node hash integrity.
1266 1266
1267 1267 Available as a function so that subclasses can extend hash mismatch
1268 1268 behaviors as needed.
1269 1269 """
1270 1270 if p1 is None and p2 is None:
1271 1271 p1, p2 = self.parents(node)
1272 1272 if node != self.hash(text, p1, p2):
1273 1273 revornode = rev
1274 1274 if revornode is None:
1275 1275 revornode = templatefilters.short(hex(node))
1276 1276 raise RevlogError(_("integrity check failed on %s:%s")
1277 1277 % (self.indexfile, revornode))
1278 1278
1279 1279 def checkinlinesize(self, tr, fp=None):
1280 1280 """Check if the revlog is too big for inline and convert if so.
1281 1281
1282 1282 This should be called after revisions are added to the revlog. If the
1283 1283 revlog has grown too large to be an inline revlog, it will convert it
1284 1284 to use multiple index and data files.
1285 1285 """
1286 1286 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1287 1287 return
1288 1288
1289 1289 trinfo = tr.find(self.indexfile)
1290 1290 if trinfo is None:
1291 1291 raise RevlogError(_("%s not found in the transaction")
1292 1292 % self.indexfile)
1293 1293
1294 1294 trindex = trinfo[2]
1295 1295 if trindex is not None:
1296 1296 dataoff = self.start(trindex)
1297 1297 else:
1298 1298 # revlog was stripped at start of transaction, use all leftover data
1299 1299 trindex = len(self) - 1
1300 1300 dataoff = self.end(-2)
1301 1301
1302 1302 tr.add(self.datafile, dataoff)
1303 1303
1304 1304 if fp:
1305 1305 fp.flush()
1306 1306 fp.close()
1307 1307
1308 1308 df = self.opener(self.datafile, 'w')
1309 1309 try:
1310 1310 for r in self:
1311 1311 df.write(self._chunkraw(r, r)[1])
1312 1312 finally:
1313 1313 df.close()
1314 1314
1315 1315 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1316 1316 checkambig=self._checkambig)
1317 1317 self.version &= ~(REVLOGNGINLINEDATA)
1318 1318 self._inline = False
1319 1319 for i in self:
1320 1320 e = self._io.packentry(self.index[i], self.node, self.version, i)
1321 1321 fp.write(e)
1322 1322
1323 1323 # if we don't call close, the temp file will never replace the
1324 1324 # real index
1325 1325 fp.close()
1326 1326
1327 1327 tr.replace(self.indexfile, trindex * self._io.size)
1328 1328 self._chunkclear()
1329 1329
1330 1330 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1331 node=None):
1331 node=None, flags=REVIDX_DEFAULT_FLAGS):
1332 1332 """add a revision to the log
1333 1333
1334 1334 text - the revision data to add
1335 1335 transaction - the transaction object used for rollback
1336 1336 link - the linkrev data to add
1337 1337 p1, p2 - the parent nodeids of the revision
1338 1338 cachedelta - an optional precomputed delta
1339 1339 node - nodeid of revision; typically node is not specified, and it is
1340 1340 computed by default as hash(text, p1, p2), however subclasses might
1341 1341 use different hashing method (and override checkhash() in such case)
1342 flags - the known flags to set on the revision
1342 1343 """
1343 1344 if link == nullrev:
1344 1345 raise RevlogError(_("attempted to add linkrev -1 to %s")
1345 1346 % self.indexfile)
1346 1347
1347 1348 if len(text) > _maxentrysize:
1348 1349 raise RevlogError(
1349 1350 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1350 1351 % (self.indexfile, len(text)))
1351 1352
1352 1353 node = node or self.hash(text, p1, p2)
1353 1354 if node in self.nodemap:
1354 1355 return node
1355 1356
1356 1357 dfh = None
1357 1358 if not self._inline:
1358 1359 dfh = self.opener(self.datafile, "a+")
1359 1360 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1360 1361 try:
1361 1362 return self._addrevision(node, text, transaction, link, p1, p2,
1362 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1363 flags, cachedelta, ifh, dfh)
1363 1364 finally:
1364 1365 if dfh:
1365 1366 dfh.close()
1366 1367 ifh.close()
1367 1368
1368 1369 def compress(self, text):
1369 1370 """ generate a possibly-compressed representation of text """
1370 1371 if not text:
1371 1372 return ("", text)
1372 1373 l = len(text)
1373 1374 bin = None
1374 1375 if l < 44:
1375 1376 pass
1376 1377 elif l > 1000000:
1377 1378 # zlib makes an internal copy, thus doubling memory usage for
1378 1379 # large files, so lets do this in pieces
1379 1380 z = zlib.compressobj()
1380 1381 p = []
1381 1382 pos = 0
1382 1383 while pos < l:
1383 1384 pos2 = pos + 2**20
1384 1385 p.append(z.compress(text[pos:pos2]))
1385 1386 pos = pos2
1386 1387 p.append(z.flush())
1387 1388 if sum(map(len, p)) < l:
1388 1389 bin = "".join(p)
1389 1390 else:
1390 1391 bin = _compress(text)
1391 1392 if bin is None or len(bin) > l:
1392 1393 if text[0] == '\0':
1393 1394 return ("", text)
1394 1395 return ('u', text)
1395 1396 return ("", bin)
1396 1397
1397 1398 def _isgooddelta(self, d, textlen):
1398 1399 """Returns True if the given delta is good. Good means that it is within
1399 1400 the disk span, disk size, and chain length bounds that we know to be
1400 1401 performant."""
1401 1402 if d is None:
1402 1403 return False
1403 1404
1404 1405 # - 'dist' is the distance from the base revision -- bounding it limits
1405 1406 # the amount of I/O we need to do.
1406 1407 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1407 1408 # to apply -- bounding it limits the amount of CPU we consume.
1408 1409 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1409 1410 if (dist > textlen * 4 or l > textlen or
1410 1411 compresseddeltalen > textlen * 2 or
1411 1412 (self._maxchainlen and chainlen > self._maxchainlen)):
1412 1413 return False
1413 1414
1414 1415 return True
1415 1416
1416 1417 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1417 1418 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1418 1419 """internal function to add revisions to the log
1419 1420
1420 1421 see addrevision for argument descriptions.
1421 1422 invariants:
1422 1423 - text is optional (can be None); if not set, cachedelta must be set.
1423 1424 if both are set, they must correspond to each other.
1424 1425 - raw is optional; if set to True, it indicates the revision data is to
1425 1426 be treated by _processflags() as raw. It is usually set by changegroup
1426 1427 generation and debug commands.
1427 1428 """
1428 1429 btext = [text]
1429 1430 def buildtext():
1430 1431 if btext[0] is not None:
1431 1432 return btext[0]
1432 1433 baserev = cachedelta[0]
1433 1434 delta = cachedelta[1]
1434 1435 # special case deltas which replace entire base; no need to decode
1435 1436 # base revision. this neatly avoids censored bases, which throw when
1436 1437 # they're decoded.
1437 1438 hlen = struct.calcsize(">lll")
1438 1439 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1439 1440 len(delta) - hlen):
1440 1441 btext[0] = delta[hlen:]
1441 1442 else:
1442 1443 if self._inline:
1443 1444 fh = ifh
1444 1445 else:
1445 1446 fh = dfh
1446 1447 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1447 1448 btext[0] = mdiff.patch(basetext, delta)
1448 1449
1449 1450 try:
1450 1451 self.checkhash(btext[0], node, p1=p1, p2=p2)
1451 1452 if flags & REVIDX_ISCENSORED:
1452 1453 raise RevlogError(_('node %s is not censored') % node)
1453 1454 except CensoredNodeError:
1454 1455 # must pass the censored index flag to add censored revisions
1455 1456 if not flags & REVIDX_ISCENSORED:
1456 1457 raise
1457 1458 return btext[0]
1458 1459
1459 1460 def builddelta(rev):
1460 1461 # can we use the cached delta?
1461 1462 if cachedelta and cachedelta[0] == rev:
1462 1463 delta = cachedelta[1]
1463 1464 else:
1464 1465 t = buildtext()
1465 1466 if self.iscensored(rev):
1466 1467 # deltas based on a censored revision must replace the
1467 1468 # full content in one patch, so delta works everywhere
1468 1469 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1469 1470 delta = header + t
1470 1471 else:
1471 1472 if self._inline:
1472 1473 fh = ifh
1473 1474 else:
1474 1475 fh = dfh
1475 1476 ptext = self.revision(self.node(rev), _df=fh)
1476 1477 delta = mdiff.textdiff(ptext, t)
1477 1478 header, data = self.compress(delta)
1478 1479 deltalen = len(header) + len(data)
1479 1480 chainbase = self.chainbase(rev)
1480 1481 dist = deltalen + offset - self.start(chainbase)
1481 1482 if self._generaldelta:
1482 1483 base = rev
1483 1484 else:
1484 1485 base = chainbase
1485 1486 chainlen, compresseddeltalen = self._chaininfo(rev)
1486 1487 chainlen += 1
1487 1488 compresseddeltalen += deltalen
1488 1489 return (dist, deltalen, (header, data), base,
1489 1490 chainbase, chainlen, compresseddeltalen)
1490 1491
1491 1492 curr = len(self)
1492 1493 prev = curr - 1
1493 1494 offset = self.end(prev)
1494 1495 delta = None
1495 1496 p1r, p2r = self.rev(p1), self.rev(p2)
1496 1497
1497 1498 # full versions are inserted when the needed deltas
1498 1499 # become comparable to the uncompressed text
1499 1500 if text is None:
1500 1501 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1501 1502 cachedelta[1])
1502 1503 else:
1503 1504 textlen = len(text)
1504 1505
1505 1506 # should we try to build a delta?
1506 1507 if prev != nullrev and self.storedeltachains:
1507 1508 tested = set()
1508 1509 # This condition is true most of the time when processing
1509 1510 # changegroup data into a generaldelta repo. The only time it
1510 1511 # isn't true is if this is the first revision in a delta chain
1511 1512 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1512 1513 if cachedelta and self._generaldelta and self._lazydeltabase:
1513 1514 # Assume what we received from the server is a good choice
1514 1515 # build delta will reuse the cache
1515 1516 candidatedelta = builddelta(cachedelta[0])
1516 1517 tested.add(cachedelta[0])
1517 1518 if self._isgooddelta(candidatedelta, textlen):
1518 1519 delta = candidatedelta
1519 1520 if delta is None and self._generaldelta:
1520 1521 # exclude already lazy tested base if any
1521 1522 parents = [p for p in (p1r, p2r)
1522 1523 if p != nullrev and p not in tested]
1523 1524 if parents and not self._aggressivemergedeltas:
1524 1525 # Pick whichever parent is closer to us (to minimize the
1525 1526 # chance of having to build a fulltext).
1526 1527 parents = [max(parents)]
1527 1528 tested.update(parents)
1528 1529 pdeltas = []
1529 1530 for p in parents:
1530 1531 pd = builddelta(p)
1531 1532 if self._isgooddelta(pd, textlen):
1532 1533 pdeltas.append(pd)
1533 1534 if pdeltas:
1534 1535 delta = min(pdeltas, key=lambda x: x[1])
1535 1536 if delta is None and prev not in tested:
1536 1537 # other approach failed try against prev to hopefully save us a
1537 1538 # fulltext.
1538 1539 candidatedelta = builddelta(prev)
1539 1540 if self._isgooddelta(candidatedelta, textlen):
1540 1541 delta = candidatedelta
1541 1542 if delta is not None:
1542 1543 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1543 1544 else:
1544 1545 text = buildtext()
1545 1546 data = self.compress(text)
1546 1547 l = len(data[1]) + len(data[0])
1547 1548 base = chainbase = curr
1548 1549
1549 1550 e = (offset_type(offset, flags), l, textlen,
1550 1551 base, link, p1r, p2r, node)
1551 1552 self.index.insert(-1, e)
1552 1553 self.nodemap[node] = curr
1553 1554
1554 1555 entry = self._io.packentry(e, self.node, self.version, curr)
1555 1556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1556 1557
1557 1558 if alwayscache and text is None:
1558 1559 text = buildtext()
1559 1560
1560 1561 if type(text) == str: # only accept immutable objects
1561 1562 self._cache = (node, curr, text)
1562 1563 self._chainbasecache[curr] = chainbase
1563 1564 return node
1564 1565
1565 1566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1566 1567 # Files opened in a+ mode have inconsistent behavior on various
1567 1568 # platforms. Windows requires that a file positioning call be made
1568 1569 # when the file handle transitions between reads and writes. See
1569 1570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1570 1571 # platforms, Python or the platform itself can be buggy. Some versions
1571 1572 # of Solaris have been observed to not append at the end of the file
1572 1573 # if the file was seeked to before the end. See issue4943 for more.
1573 1574 #
1574 1575 # We work around this issue by inserting a seek() before writing.
1575 1576 # Note: This is likely not necessary on Python 3.
1576 1577 ifh.seek(0, os.SEEK_END)
1577 1578 if dfh:
1578 1579 dfh.seek(0, os.SEEK_END)
1579 1580
1580 1581 curr = len(self) - 1
1581 1582 if not self._inline:
1582 1583 transaction.add(self.datafile, offset)
1583 1584 transaction.add(self.indexfile, curr * len(entry))
1584 1585 if data[0]:
1585 1586 dfh.write(data[0])
1586 1587 dfh.write(data[1])
1587 1588 ifh.write(entry)
1588 1589 else:
1589 1590 offset += curr * self._io.size
1590 1591 transaction.add(self.indexfile, offset, curr)
1591 1592 ifh.write(entry)
1592 1593 ifh.write(data[0])
1593 1594 ifh.write(data[1])
1594 1595 self.checkinlinesize(transaction, ifh)
1595 1596
1596 1597 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1597 1598 """
1598 1599 add a delta group
1599 1600
1600 1601 given a set of deltas, add them to the revision log. the
1601 1602 first delta is against its parent, which should be in our
1602 1603 log, the rest are against the previous delta.
1603 1604
1604 1605 If ``addrevisioncb`` is defined, it will be called with arguments of
1605 1606 this revlog and the node that was added.
1606 1607 """
1607 1608
1608 1609 # track the base of the current delta log
1609 1610 content = []
1610 1611 node = None
1611 1612
1612 1613 r = len(self)
1613 1614 end = 0
1614 1615 if r:
1615 1616 end = self.end(r - 1)
1616 1617 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1617 1618 isize = r * self._io.size
1618 1619 if self._inline:
1619 1620 transaction.add(self.indexfile, end + isize, r)
1620 1621 dfh = None
1621 1622 else:
1622 1623 transaction.add(self.indexfile, isize, r)
1623 1624 transaction.add(self.datafile, end)
1624 1625 dfh = self.opener(self.datafile, "a+")
1625 1626 def flush():
1626 1627 if dfh:
1627 1628 dfh.flush()
1628 1629 ifh.flush()
1629 1630 try:
1630 1631 # loop through our set of deltas
1631 1632 chain = None
1632 1633 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1633 1634 node = chunkdata['node']
1634 1635 p1 = chunkdata['p1']
1635 1636 p2 = chunkdata['p2']
1636 1637 cs = chunkdata['cs']
1637 1638 deltabase = chunkdata['deltabase']
1638 1639 delta = chunkdata['delta']
1639 1640 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1640 1641
1641 1642 content.append(node)
1642 1643
1643 1644 link = linkmapper(cs)
1644 1645 if node in self.nodemap:
1645 1646 # this can happen if two branches make the same change
1646 1647 chain = node
1647 1648 continue
1648 1649
1649 1650 for p in (p1, p2):
1650 1651 if p not in self.nodemap:
1651 1652 raise LookupError(p, self.indexfile,
1652 1653 _('unknown parent'))
1653 1654
1654 1655 if deltabase not in self.nodemap:
1655 1656 raise LookupError(deltabase, self.indexfile,
1656 1657 _('unknown delta base'))
1657 1658
1658 1659 baserev = self.rev(deltabase)
1659 1660
1660 1661 if baserev != nullrev and self.iscensored(baserev):
1661 1662 # if base is censored, delta must be full replacement in a
1662 1663 # single patch operation
1663 1664 hlen = struct.calcsize(">lll")
1664 1665 oldlen = self.rawsize(baserev)
1665 1666 newlen = len(delta) - hlen
1666 1667 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1667 1668 raise error.CensoredBaseError(self.indexfile,
1668 1669 self.node(baserev))
1669 1670
1670 1671 if not flags and self._peek_iscensored(baserev, delta, flush):
1671 1672 flags |= REVIDX_ISCENSORED
1672 1673
1673 1674 # We assume consumers of addrevisioncb will want to retrieve
1674 1675 # the added revision, which will require a call to
1675 1676 # revision(). revision() will fast path if there is a cache
1676 1677 # hit. So, we tell _addrevision() to always cache in this case.
1677 1678 # We're only using addgroup() in the context of changegroup
1678 1679 # generation so the revision data can always be handled as raw
1679 1680 # by the flagprocessor.
1680 1681 chain = self._addrevision(node, None, transaction, link,
1681 1682 p1, p2, flags, (baserev, delta),
1682 1683 ifh, dfh,
1683 1684 alwayscache=bool(addrevisioncb),
1684 1685 raw=True)
1685 1686
1686 1687 if addrevisioncb:
1687 1688 addrevisioncb(self, chain)
1688 1689
1689 1690 if not dfh and not self._inline:
1690 1691 # addrevision switched from inline to conventional
1691 1692 # reopen the index
1692 1693 ifh.close()
1693 1694 dfh = self.opener(self.datafile, "a+")
1694 1695 ifh = self.opener(self.indexfile, "a+",
1695 1696 checkambig=self._checkambig)
1696 1697 finally:
1697 1698 if dfh:
1698 1699 dfh.close()
1699 1700 ifh.close()
1700 1701
1701 1702 return content
1702 1703
1703 1704 def iscensored(self, rev):
1704 1705 """Check if a file revision is censored."""
1705 1706 return False
1706 1707
1707 1708 def _peek_iscensored(self, baserev, delta, flush):
1708 1709 """Quickly check if a delta produces a censored revision."""
1709 1710 return False
1710 1711
1711 1712 def getstrippoint(self, minlink):
1712 1713 """find the minimum rev that must be stripped to strip the linkrev
1713 1714
1714 1715 Returns a tuple containing the minimum rev and a set of all revs that
1715 1716 have linkrevs that will be broken by this strip.
1716 1717 """
1717 1718 brokenrevs = set()
1718 1719 strippoint = len(self)
1719 1720
1720 1721 heads = {}
1721 1722 futurelargelinkrevs = set()
1722 1723 for head in self.headrevs():
1723 1724 headlinkrev = self.linkrev(head)
1724 1725 heads[head] = headlinkrev
1725 1726 if headlinkrev >= minlink:
1726 1727 futurelargelinkrevs.add(headlinkrev)
1727 1728
1728 1729 # This algorithm involves walking down the rev graph, starting at the
1729 1730 # heads. Since the revs are topologically sorted according to linkrev,
1730 1731 # once all head linkrevs are below the minlink, we know there are
1731 1732 # no more revs that could have a linkrev greater than minlink.
1732 1733 # So we can stop walking.
1733 1734 while futurelargelinkrevs:
1734 1735 strippoint -= 1
1735 1736 linkrev = heads.pop(strippoint)
1736 1737
1737 1738 if linkrev < minlink:
1738 1739 brokenrevs.add(strippoint)
1739 1740 else:
1740 1741 futurelargelinkrevs.remove(linkrev)
1741 1742
1742 1743 for p in self.parentrevs(strippoint):
1743 1744 if p != nullrev:
1744 1745 plinkrev = self.linkrev(p)
1745 1746 heads[p] = plinkrev
1746 1747 if plinkrev >= minlink:
1747 1748 futurelargelinkrevs.add(plinkrev)
1748 1749
1749 1750 return strippoint, brokenrevs
1750 1751
1751 1752 def strip(self, minlink, transaction):
1752 1753 """truncate the revlog on the first revision with a linkrev >= minlink
1753 1754
1754 1755 This function is called when we're stripping revision minlink and
1755 1756 its descendants from the repository.
1756 1757
1757 1758 We have to remove all revisions with linkrev >= minlink, because
1758 1759 the equivalent changelog revisions will be renumbered after the
1759 1760 strip.
1760 1761
1761 1762 So we truncate the revlog on the first of these revisions, and
1762 1763 trust that the caller has saved the revisions that shouldn't be
1763 1764 removed and that it'll re-add them after this truncation.
1764 1765 """
1765 1766 if len(self) == 0:
1766 1767 return
1767 1768
1768 1769 rev, _ = self.getstrippoint(minlink)
1769 1770 if rev == len(self):
1770 1771 return
1771 1772
1772 1773 # first truncate the files on disk
1773 1774 end = self.start(rev)
1774 1775 if not self._inline:
1775 1776 transaction.add(self.datafile, end)
1776 1777 end = rev * self._io.size
1777 1778 else:
1778 1779 end += rev * self._io.size
1779 1780
1780 1781 transaction.add(self.indexfile, end)
1781 1782
1782 1783 # then reset internal state in memory to forget those revisions
1783 1784 self._cache = None
1784 1785 self._chaininfocache = {}
1785 1786 self._chunkclear()
1786 1787 for x in xrange(rev, len(self)):
1787 1788 del self.nodemap[self.node(x)]
1788 1789
1789 1790 del self.index[rev:-1]
1790 1791
1791 1792 def checksize(self):
1792 1793 expected = 0
1793 1794 if len(self):
1794 1795 expected = max(0, self.end(len(self) - 1))
1795 1796
1796 1797 try:
1797 1798 f = self.opener(self.datafile)
1798 1799 f.seek(0, 2)
1799 1800 actual = f.tell()
1800 1801 f.close()
1801 1802 dd = actual - expected
1802 1803 except IOError as inst:
1803 1804 if inst.errno != errno.ENOENT:
1804 1805 raise
1805 1806 dd = 0
1806 1807
1807 1808 try:
1808 1809 f = self.opener(self.indexfile)
1809 1810 f.seek(0, 2)
1810 1811 actual = f.tell()
1811 1812 f.close()
1812 1813 s = self._io.size
1813 1814 i = max(0, actual // s)
1814 1815 di = actual - (i * s)
1815 1816 if self._inline:
1816 1817 databytes = 0
1817 1818 for r in self:
1818 1819 databytes += max(0, self.length(r))
1819 1820 dd = 0
1820 1821 di = actual - len(self) * s - databytes
1821 1822 except IOError as inst:
1822 1823 if inst.errno != errno.ENOENT:
1823 1824 raise
1824 1825 di = 0
1825 1826
1826 1827 return (dd, di)
1827 1828
1828 1829 def files(self):
1829 1830 res = [self.indexfile]
1830 1831 if not self._inline:
1831 1832 res.append(self.datafile)
1832 1833 return res
General Comments 0
You need to be logged in to leave comments. Login now