##// END OF EJS Templates
revlog: use bytes() to ensure text from _chunks is a reasonable type
Augie Fackler -
r31357:c233cbda default
parent child Browse files
Show More
@@ -1,2101 +1,2101
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 # Aliased for performance.
43 43 _zlibdecompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
58 58 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
59 59 REVIDX_DEFAULT_FLAGS = 0
60 60 # stable order in which flags need to be processed and their processors applied
61 61 REVIDX_FLAGS_ORDER = [
62 62 REVIDX_ISCENSORED,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 ]
66 66 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
67 67
68 68 # max size of revlog with inline data
69 69 _maxinline = 131072
70 70 _chunksize = 1048576
71 71
72 72 RevlogError = error.RevlogError
73 73 LookupError = error.LookupError
74 74 CensoredNodeError = error.CensoredNodeError
75 75 ProgrammingError = error.ProgrammingError
76 76
77 77 # Store flag processors (cf. 'addflagprocessor()' to register)
78 78 _flagprocessors = {
79 79 REVIDX_ISCENSORED: None,
80 80 }
81 81
82 82 def addflagprocessor(flag, processor):
83 83 """Register a flag processor on a revision data flag.
84 84
85 85 Invariant:
86 86 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
87 87 - Only one flag processor can be registered on a specific flag.
88 88 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
89 89 following signatures:
90 90 - (read) f(self, text) -> newtext, bool
91 91 - (write) f(self, text) -> newtext, bool
92 92 - (raw) f(self, text) -> bool
93 93 The boolean returned by these transforms is used to determine whether
94 94 'newtext' can be used for hash integrity checking.
95 95
96 96 Note: The 'raw' transform is used for changegroup generation and in some
97 97 debug commands. In this case the transform only indicates whether the
98 98 contents can be used for hash integrity checks.
99 99 """
100 100 if not flag & REVIDX_KNOWN_FLAGS:
101 101 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
102 102 raise ProgrammingError(msg)
103 103 if flag not in REVIDX_FLAGS_ORDER:
104 104 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
105 105 raise ProgrammingError(msg)
106 106 if flag in _flagprocessors:
107 107 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
108 108 raise error.Abort(msg)
109 109 _flagprocessors[flag] = processor
110 110
111 111 def getoffset(q):
112 112 return int(q >> 16)
113 113
114 114 def gettype(q):
115 115 return int(q & 0xFFFF)
116 116
117 117 def offset_type(offset, type):
118 118 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
119 119 raise ValueError('unknown revlog index flags')
120 120 return long(long(offset) << 16 | type)
121 121
122 122 _nullhash = hashlib.sha1(nullid)
123 123
124 124 def hash(text, p1, p2):
125 125 """generate a hash from the given text and its parent hashes
126 126
127 127 This hash combines both the current file contents and its history
128 128 in a manner that makes it easy to distinguish nodes with the same
129 129 content in the revision graph.
130 130 """
131 131 # As of now, if one of the parent node is null, p2 is null
132 132 if p2 == nullid:
133 133 # deep copy of a hash is faster than creating one
134 134 s = _nullhash.copy()
135 135 s.update(p1)
136 136 else:
137 137 # none of the parent nodes are nullid
138 138 l = [p1, p2]
139 139 l.sort()
140 140 s = hashlib.sha1(l[0])
141 141 s.update(l[1])
142 142 s.update(text)
143 143 return s.digest()
144 144
145 145 # index v0:
146 146 # 4 bytes: offset
147 147 # 4 bytes: compressed length
148 148 # 4 bytes: base rev
149 149 # 4 bytes: link rev
150 150 # 20 bytes: parent 1 nodeid
151 151 # 20 bytes: parent 2 nodeid
152 152 # 20 bytes: nodeid
153 153 indexformatv0 = ">4l20s20s20s"
154 154
155 155 class revlogoldio(object):
156 156 def __init__(self):
157 157 self.size = struct.calcsize(indexformatv0)
158 158
159 159 def parseindex(self, data, inline):
160 160 s = self.size
161 161 index = []
162 162 nodemap = {nullid: nullrev}
163 163 n = off = 0
164 164 l = len(data)
165 165 while off + s <= l:
166 166 cur = data[off:off + s]
167 167 off += s
168 168 e = _unpack(indexformatv0, cur)
169 169 # transform to revlogv1 format
170 170 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
171 171 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
172 172 index.append(e2)
173 173 nodemap[e[6]] = n
174 174 n += 1
175 175
176 176 # add the magic null revision at -1
177 177 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
178 178
179 179 return index, nodemap, None
180 180
181 181 def packentry(self, entry, node, version, rev):
182 182 if gettype(entry[0]):
183 183 raise RevlogError(_("index entry flags need RevlogNG"))
184 184 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
185 185 node(entry[5]), node(entry[6]), entry[7])
186 186 return _pack(indexformatv0, *e2)
187 187
188 188 # index ng:
189 189 # 6 bytes: offset
190 190 # 2 bytes: flags
191 191 # 4 bytes: compressed length
192 192 # 4 bytes: uncompressed length
193 193 # 4 bytes: base rev
194 194 # 4 bytes: link rev
195 195 # 4 bytes: parent 1 rev
196 196 # 4 bytes: parent 2 rev
197 197 # 32 bytes: nodeid
198 198 indexformatng = ">Qiiiiii20s12x"
199 199 versionformat = ">I"
200 200
201 201 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
202 202 # signed integer)
203 203 _maxentrysize = 0x7fffffff
204 204
205 205 class revlogio(object):
206 206 def __init__(self):
207 207 self.size = struct.calcsize(indexformatng)
208 208
209 209 def parseindex(self, data, inline):
210 210 # call the C implementation to parse the index data
211 211 index, cache = parsers.parse_index2(data, inline)
212 212 return index, getattr(index, 'nodemap', None), cache
213 213
214 214 def packentry(self, entry, node, version, rev):
215 215 p = _pack(indexformatng, *entry)
216 216 if rev == 0:
217 217 p = _pack(versionformat, version) + p[4:]
218 218 return p
219 219
220 220 class revlog(object):
221 221 """
222 222 the underlying revision storage object
223 223
224 224 A revlog consists of two parts, an index and the revision data.
225 225
226 226 The index is a file with a fixed record size containing
227 227 information on each revision, including its nodeid (hash), the
228 228 nodeids of its parents, the position and offset of its data within
229 229 the data file, and the revision it's based on. Finally, each entry
230 230 contains a linkrev entry that can serve as a pointer to external
231 231 data.
232 232
233 233 The revision data itself is a linear collection of data chunks.
234 234 Each chunk represents a revision and is usually represented as a
235 235 delta against the previous chunk. To bound lookup time, runs of
236 236 deltas are limited to about 2 times the length of the original
237 237 version data. This makes retrieval of a version proportional to
238 238 its size, or O(1) relative to the number of revisions.
239 239
240 240 Both pieces of the revlog are written to in an append-only
241 241 fashion, which means we never need to rewrite a file to insert or
242 242 remove data, and can use some simple techniques to avoid the need
243 243 for locking while reading.
244 244
245 245 If checkambig, indexfile is opened with checkambig=True at
246 246 writing, to avoid file stat ambiguity.
247 247 """
248 248 def __init__(self, opener, indexfile, checkambig=False):
249 249 """
250 250 create a revlog object
251 251
252 252 opener is a function that abstracts the file opening operation
253 253 and can be used to implement COW semantics or the like.
254 254 """
255 255 self.indexfile = indexfile
256 256 self.datafile = indexfile[:-2] + ".d"
257 257 self.opener = opener
258 258 # When True, indexfile is opened with checkambig=True at writing, to
259 259 # avoid file stat ambiguity.
260 260 self._checkambig = checkambig
261 261 # 3-tuple of (node, rev, text) for a raw revision.
262 262 self._cache = None
263 263 # Maps rev to chain base rev.
264 264 self._chainbasecache = util.lrucachedict(100)
265 265 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
266 266 self._chunkcache = (0, '')
267 267 # How much data to read and cache into the raw revlog data cache.
268 268 self._chunkcachesize = 65536
269 269 self._maxchainlen = None
270 270 self._aggressivemergedeltas = False
271 271 self.index = []
272 272 # Mapping of partial identifiers to full nodes.
273 273 self._pcache = {}
274 274 # Mapping of revision integer to full node.
275 275 self._nodecache = {nullid: nullrev}
276 276 self._nodepos = None
277 277 self._compengine = 'zlib'
278 278
279 279 v = REVLOG_DEFAULT_VERSION
280 280 opts = getattr(opener, 'options', None)
281 281 if opts is not None:
282 282 if 'revlogv1' in opts:
283 283 if 'generaldelta' in opts:
284 284 v |= REVLOGGENERALDELTA
285 285 else:
286 286 v = 0
287 287 if 'chunkcachesize' in opts:
288 288 self._chunkcachesize = opts['chunkcachesize']
289 289 if 'maxchainlen' in opts:
290 290 self._maxchainlen = opts['maxchainlen']
291 291 if 'aggressivemergedeltas' in opts:
292 292 self._aggressivemergedeltas = opts['aggressivemergedeltas']
293 293 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
294 294 if 'compengine' in opts:
295 295 self._compengine = opts['compengine']
296 296
297 297 if self._chunkcachesize <= 0:
298 298 raise RevlogError(_('revlog chunk cache size %r is not greater '
299 299 'than 0') % self._chunkcachesize)
300 300 elif self._chunkcachesize & (self._chunkcachesize - 1):
301 301 raise RevlogError(_('revlog chunk cache size %r is not a power '
302 302 'of 2') % self._chunkcachesize)
303 303
304 304 indexdata = ''
305 305 self._initempty = True
306 306 try:
307 307 f = self.opener(self.indexfile)
308 308 indexdata = f.read()
309 309 f.close()
310 310 if len(indexdata) > 0:
311 311 v = struct.unpack(versionformat, indexdata[:4])[0]
312 312 self._initempty = False
313 313 except IOError as inst:
314 314 if inst.errno != errno.ENOENT:
315 315 raise
316 316
317 317 self.version = v
318 318 self._inline = v & REVLOGNGINLINEDATA
319 319 self._generaldelta = v & REVLOGGENERALDELTA
320 320 flags = v & ~0xFFFF
321 321 fmt = v & 0xFFFF
322 322 if fmt == REVLOGV0 and flags:
323 323 raise RevlogError(_("index %s unknown flags %#04x for format v0")
324 324 % (self.indexfile, flags >> 16))
325 325 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
326 326 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
327 327 % (self.indexfile, flags >> 16))
328 328 elif fmt > REVLOGNG:
329 329 raise RevlogError(_("index %s unknown format %d")
330 330 % (self.indexfile, fmt))
331 331
332 332 self.storedeltachains = True
333 333
334 334 self._io = revlogio()
335 335 if self.version == REVLOGV0:
336 336 self._io = revlogoldio()
337 337 try:
338 338 d = self._io.parseindex(indexdata, self._inline)
339 339 except (ValueError, IndexError):
340 340 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
341 341 self.index, nodemap, self._chunkcache = d
342 342 if nodemap is not None:
343 343 self.nodemap = self._nodecache = nodemap
344 344 if not self._chunkcache:
345 345 self._chunkclear()
346 346 # revnum -> (chain-length, sum-delta-length)
347 347 self._chaininfocache = {}
348 348 # revlog header -> revlog compressor
349 349 self._decompressors = {}
350 350
351 351 @util.propertycache
352 352 def _compressor(self):
353 353 return util.compengines[self._compengine].revlogcompressor()
354 354
355 355 def tip(self):
356 356 return self.node(len(self.index) - 2)
357 357 def __contains__(self, rev):
358 358 return 0 <= rev < len(self)
359 359 def __len__(self):
360 360 return len(self.index) - 1
361 361 def __iter__(self):
362 362 return iter(xrange(len(self)))
363 363 def revs(self, start=0, stop=None):
364 364 """iterate over all rev in this revlog (from start to stop)"""
365 365 step = 1
366 366 if stop is not None:
367 367 if start > stop:
368 368 step = -1
369 369 stop += step
370 370 else:
371 371 stop = len(self)
372 372 return xrange(start, stop, step)
373 373
374 374 @util.propertycache
375 375 def nodemap(self):
376 376 self.rev(self.node(0))
377 377 return self._nodecache
378 378
379 379 def hasnode(self, node):
380 380 try:
381 381 self.rev(node)
382 382 return True
383 383 except KeyError:
384 384 return False
385 385
386 386 def clearcaches(self):
387 387 self._cache = None
388 388 self._chainbasecache.clear()
389 389 self._chunkcache = (0, '')
390 390 self._pcache = {}
391 391
392 392 try:
393 393 self._nodecache.clearcaches()
394 394 except AttributeError:
395 395 self._nodecache = {nullid: nullrev}
396 396 self._nodepos = None
397 397
398 398 def rev(self, node):
399 399 try:
400 400 return self._nodecache[node]
401 401 except TypeError:
402 402 raise
403 403 except RevlogError:
404 404 # parsers.c radix tree lookup failed
405 405 raise LookupError(node, self.indexfile, _('no node'))
406 406 except KeyError:
407 407 # pure python cache lookup failed
408 408 n = self._nodecache
409 409 i = self.index
410 410 p = self._nodepos
411 411 if p is None:
412 412 p = len(i) - 2
413 413 for r in xrange(p, -1, -1):
414 414 v = i[r][7]
415 415 n[v] = r
416 416 if v == node:
417 417 self._nodepos = r - 1
418 418 return r
419 419 raise LookupError(node, self.indexfile, _('no node'))
420 420
421 421 # Accessors for index entries.
422 422
423 423 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
424 424 # are flags.
425 425 def start(self, rev):
426 426 return int(self.index[rev][0] >> 16)
427 427
428 428 def flags(self, rev):
429 429 return self.index[rev][0] & 0xFFFF
430 430
431 431 def length(self, rev):
432 432 return self.index[rev][1]
433 433
434 434 def rawsize(self, rev):
435 435 """return the length of the uncompressed text for a given revision"""
436 436 l = self.index[rev][2]
437 437 if l >= 0:
438 438 return l
439 439
440 440 t = self.revision(self.node(rev))
441 441 return len(t)
442 442 size = rawsize
443 443
444 444 def chainbase(self, rev):
445 445 base = self._chainbasecache.get(rev)
446 446 if base is not None:
447 447 return base
448 448
449 449 index = self.index
450 450 base = index[rev][3]
451 451 while base != rev:
452 452 rev = base
453 453 base = index[rev][3]
454 454
455 455 self._chainbasecache[rev] = base
456 456 return base
457 457
458 458 def linkrev(self, rev):
459 459 return self.index[rev][4]
460 460
461 461 def parentrevs(self, rev):
462 462 return self.index[rev][5:7]
463 463
464 464 def node(self, rev):
465 465 return self.index[rev][7]
466 466
467 467 # Derived from index values.
468 468
469 469 def end(self, rev):
470 470 return self.start(rev) + self.length(rev)
471 471
472 472 def parents(self, node):
473 473 i = self.index
474 474 d = i[self.rev(node)]
475 475 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
476 476
477 477 def chainlen(self, rev):
478 478 return self._chaininfo(rev)[0]
479 479
480 480 def _chaininfo(self, rev):
481 481 chaininfocache = self._chaininfocache
482 482 if rev in chaininfocache:
483 483 return chaininfocache[rev]
484 484 index = self.index
485 485 generaldelta = self._generaldelta
486 486 iterrev = rev
487 487 e = index[iterrev]
488 488 clen = 0
489 489 compresseddeltalen = 0
490 490 while iterrev != e[3]:
491 491 clen += 1
492 492 compresseddeltalen += e[1]
493 493 if generaldelta:
494 494 iterrev = e[3]
495 495 else:
496 496 iterrev -= 1
497 497 if iterrev in chaininfocache:
498 498 t = chaininfocache[iterrev]
499 499 clen += t[0]
500 500 compresseddeltalen += t[1]
501 501 break
502 502 e = index[iterrev]
503 503 else:
504 504 # Add text length of base since decompressing that also takes
505 505 # work. For cache hits the length is already included.
506 506 compresseddeltalen += e[1]
507 507 r = (clen, compresseddeltalen)
508 508 chaininfocache[rev] = r
509 509 return r
510 510
511 511 def _deltachain(self, rev, stoprev=None):
512 512 """Obtain the delta chain for a revision.
513 513
514 514 ``stoprev`` specifies a revision to stop at. If not specified, we
515 515 stop at the base of the chain.
516 516
517 517 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
518 518 revs in ascending order and ``stopped`` is a bool indicating whether
519 519 ``stoprev`` was hit.
520 520 """
521 521 chain = []
522 522
523 523 # Alias to prevent attribute lookup in tight loop.
524 524 index = self.index
525 525 generaldelta = self._generaldelta
526 526
527 527 iterrev = rev
528 528 e = index[iterrev]
529 529 while iterrev != e[3] and iterrev != stoprev:
530 530 chain.append(iterrev)
531 531 if generaldelta:
532 532 iterrev = e[3]
533 533 else:
534 534 iterrev -= 1
535 535 e = index[iterrev]
536 536
537 537 if iterrev == stoprev:
538 538 stopped = True
539 539 else:
540 540 chain.append(iterrev)
541 541 stopped = False
542 542
543 543 chain.reverse()
544 544 return chain, stopped
545 545
546 546 def ancestors(self, revs, stoprev=0, inclusive=False):
547 547 """Generate the ancestors of 'revs' in reverse topological order.
548 548 Does not generate revs lower than stoprev.
549 549
550 550 See the documentation for ancestor.lazyancestors for more details."""
551 551
552 552 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
553 553 inclusive=inclusive)
554 554
555 555 def descendants(self, revs):
556 556 """Generate the descendants of 'revs' in revision order.
557 557
558 558 Yield a sequence of revision numbers starting with a child of
559 559 some rev in revs, i.e., each revision is *not* considered a
560 560 descendant of itself. Results are ordered by revision number (a
561 561 topological sort)."""
562 562 first = min(revs)
563 563 if first == nullrev:
564 564 for i in self:
565 565 yield i
566 566 return
567 567
568 568 seen = set(revs)
569 569 for i in self.revs(start=first + 1):
570 570 for x in self.parentrevs(i):
571 571 if x != nullrev and x in seen:
572 572 seen.add(i)
573 573 yield i
574 574 break
575 575
576 576 def findcommonmissing(self, common=None, heads=None):
577 577 """Return a tuple of the ancestors of common and the ancestors of heads
578 578 that are not ancestors of common. In revset terminology, we return the
579 579 tuple:
580 580
581 581 ::common, (::heads) - (::common)
582 582
583 583 The list is sorted by revision number, meaning it is
584 584 topologically sorted.
585 585
586 586 'heads' and 'common' are both lists of node IDs. If heads is
587 587 not supplied, uses all of the revlog's heads. If common is not
588 588 supplied, uses nullid."""
589 589 if common is None:
590 590 common = [nullid]
591 591 if heads is None:
592 592 heads = self.heads()
593 593
594 594 common = [self.rev(n) for n in common]
595 595 heads = [self.rev(n) for n in heads]
596 596
597 597 # we want the ancestors, but inclusive
598 598 class lazyset(object):
599 599 def __init__(self, lazyvalues):
600 600 self.addedvalues = set()
601 601 self.lazyvalues = lazyvalues
602 602
603 603 def __contains__(self, value):
604 604 return value in self.addedvalues or value in self.lazyvalues
605 605
606 606 def __iter__(self):
607 607 added = self.addedvalues
608 608 for r in added:
609 609 yield r
610 610 for r in self.lazyvalues:
611 611 if not r in added:
612 612 yield r
613 613
614 614 def add(self, value):
615 615 self.addedvalues.add(value)
616 616
617 617 def update(self, values):
618 618 self.addedvalues.update(values)
619 619
620 620 has = lazyset(self.ancestors(common))
621 621 has.add(nullrev)
622 622 has.update(common)
623 623
624 624 # take all ancestors from heads that aren't in has
625 625 missing = set()
626 626 visit = collections.deque(r for r in heads if r not in has)
627 627 while visit:
628 628 r = visit.popleft()
629 629 if r in missing:
630 630 continue
631 631 else:
632 632 missing.add(r)
633 633 for p in self.parentrevs(r):
634 634 if p not in has:
635 635 visit.append(p)
636 636 missing = list(missing)
637 637 missing.sort()
638 638 return has, [self.node(miss) for miss in missing]
639 639
640 640 def incrementalmissingrevs(self, common=None):
641 641 """Return an object that can be used to incrementally compute the
642 642 revision numbers of the ancestors of arbitrary sets that are not
643 643 ancestors of common. This is an ancestor.incrementalmissingancestors
644 644 object.
645 645
646 646 'common' is a list of revision numbers. If common is not supplied, uses
647 647 nullrev.
648 648 """
649 649 if common is None:
650 650 common = [nullrev]
651 651
652 652 return ancestor.incrementalmissingancestors(self.parentrevs, common)
653 653
654 654 def findmissingrevs(self, common=None, heads=None):
655 655 """Return the revision numbers of the ancestors of heads that
656 656 are not ancestors of common.
657 657
658 658 More specifically, return a list of revision numbers corresponding to
659 659 nodes N such that every N satisfies the following constraints:
660 660
661 661 1. N is an ancestor of some node in 'heads'
662 662 2. N is not an ancestor of any node in 'common'
663 663
664 664 The list is sorted by revision number, meaning it is
665 665 topologically sorted.
666 666
667 667 'heads' and 'common' are both lists of revision numbers. If heads is
668 668 not supplied, uses all of the revlog's heads. If common is not
669 669 supplied, uses nullid."""
670 670 if common is None:
671 671 common = [nullrev]
672 672 if heads is None:
673 673 heads = self.headrevs()
674 674
675 675 inc = self.incrementalmissingrevs(common=common)
676 676 return inc.missingancestors(heads)
677 677
678 678 def findmissing(self, common=None, heads=None):
679 679 """Return the ancestors of heads that are not ancestors of common.
680 680
681 681 More specifically, return a list of nodes N such that every N
682 682 satisfies the following constraints:
683 683
684 684 1. N is an ancestor of some node in 'heads'
685 685 2. N is not an ancestor of any node in 'common'
686 686
687 687 The list is sorted by revision number, meaning it is
688 688 topologically sorted.
689 689
690 690 'heads' and 'common' are both lists of node IDs. If heads is
691 691 not supplied, uses all of the revlog's heads. If common is not
692 692 supplied, uses nullid."""
693 693 if common is None:
694 694 common = [nullid]
695 695 if heads is None:
696 696 heads = self.heads()
697 697
698 698 common = [self.rev(n) for n in common]
699 699 heads = [self.rev(n) for n in heads]
700 700
701 701 inc = self.incrementalmissingrevs(common=common)
702 702 return [self.node(r) for r in inc.missingancestors(heads)]
703 703
704 704 def nodesbetween(self, roots=None, heads=None):
705 705 """Return a topological path from 'roots' to 'heads'.
706 706
707 707 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
708 708 topologically sorted list of all nodes N that satisfy both of
709 709 these constraints:
710 710
711 711 1. N is a descendant of some node in 'roots'
712 712 2. N is an ancestor of some node in 'heads'
713 713
714 714 Every node is considered to be both a descendant and an ancestor
715 715 of itself, so every reachable node in 'roots' and 'heads' will be
716 716 included in 'nodes'.
717 717
718 718 'outroots' is the list of reachable nodes in 'roots', i.e., the
719 719 subset of 'roots' that is returned in 'nodes'. Likewise,
720 720 'outheads' is the subset of 'heads' that is also in 'nodes'.
721 721
722 722 'roots' and 'heads' are both lists of node IDs. If 'roots' is
723 723 unspecified, uses nullid as the only root. If 'heads' is
724 724 unspecified, uses list of all of the revlog's heads."""
725 725 nonodes = ([], [], [])
726 726 if roots is not None:
727 727 roots = list(roots)
728 728 if not roots:
729 729 return nonodes
730 730 lowestrev = min([self.rev(n) for n in roots])
731 731 else:
732 732 roots = [nullid] # Everybody's a descendant of nullid
733 733 lowestrev = nullrev
734 734 if (lowestrev == nullrev) and (heads is None):
735 735 # We want _all_ the nodes!
736 736 return ([self.node(r) for r in self], [nullid], list(self.heads()))
737 737 if heads is None:
738 738 # All nodes are ancestors, so the latest ancestor is the last
739 739 # node.
740 740 highestrev = len(self) - 1
741 741 # Set ancestors to None to signal that every node is an ancestor.
742 742 ancestors = None
743 743 # Set heads to an empty dictionary for later discovery of heads
744 744 heads = {}
745 745 else:
746 746 heads = list(heads)
747 747 if not heads:
748 748 return nonodes
749 749 ancestors = set()
750 750 # Turn heads into a dictionary so we can remove 'fake' heads.
751 751 # Also, later we will be using it to filter out the heads we can't
752 752 # find from roots.
753 753 heads = dict.fromkeys(heads, False)
754 754 # Start at the top and keep marking parents until we're done.
755 755 nodestotag = set(heads)
756 756 # Remember where the top was so we can use it as a limit later.
757 757 highestrev = max([self.rev(n) for n in nodestotag])
758 758 while nodestotag:
759 759 # grab a node to tag
760 760 n = nodestotag.pop()
761 761 # Never tag nullid
762 762 if n == nullid:
763 763 continue
764 764 # A node's revision number represents its place in a
765 765 # topologically sorted list of nodes.
766 766 r = self.rev(n)
767 767 if r >= lowestrev:
768 768 if n not in ancestors:
769 769 # If we are possibly a descendant of one of the roots
770 770 # and we haven't already been marked as an ancestor
771 771 ancestors.add(n) # Mark as ancestor
772 772 # Add non-nullid parents to list of nodes to tag.
773 773 nodestotag.update([p for p in self.parents(n) if
774 774 p != nullid])
775 775 elif n in heads: # We've seen it before, is it a fake head?
776 776 # So it is, real heads should not be the ancestors of
777 777 # any other heads.
778 778 heads.pop(n)
779 779 if not ancestors:
780 780 return nonodes
781 781 # Now that we have our set of ancestors, we want to remove any
782 782 # roots that are not ancestors.
783 783
784 784 # If one of the roots was nullid, everything is included anyway.
785 785 if lowestrev > nullrev:
786 786 # But, since we weren't, let's recompute the lowest rev to not
787 787 # include roots that aren't ancestors.
788 788
789 789 # Filter out roots that aren't ancestors of heads
790 790 roots = [root for root in roots if root in ancestors]
791 791 # Recompute the lowest revision
792 792 if roots:
793 793 lowestrev = min([self.rev(root) for root in roots])
794 794 else:
795 795 # No more roots? Return empty list
796 796 return nonodes
797 797 else:
798 798 # We are descending from nullid, and don't need to care about
799 799 # any other roots.
800 800 lowestrev = nullrev
801 801 roots = [nullid]
802 802 # Transform our roots list into a set.
803 803 descendants = set(roots)
804 804 # Also, keep the original roots so we can filter out roots that aren't
805 805 # 'real' roots (i.e. are descended from other roots).
806 806 roots = descendants.copy()
807 807 # Our topologically sorted list of output nodes.
808 808 orderedout = []
809 809 # Don't start at nullid since we don't want nullid in our output list,
810 810 # and if nullid shows up in descendants, empty parents will look like
811 811 # they're descendants.
812 812 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
813 813 n = self.node(r)
814 814 isdescendant = False
815 815 if lowestrev == nullrev: # Everybody is a descendant of nullid
816 816 isdescendant = True
817 817 elif n in descendants:
818 818 # n is already a descendant
819 819 isdescendant = True
820 820 # This check only needs to be done here because all the roots
821 821 # will start being marked is descendants before the loop.
822 822 if n in roots:
823 823 # If n was a root, check if it's a 'real' root.
824 824 p = tuple(self.parents(n))
825 825 # If any of its parents are descendants, it's not a root.
826 826 if (p[0] in descendants) or (p[1] in descendants):
827 827 roots.remove(n)
828 828 else:
829 829 p = tuple(self.parents(n))
830 830 # A node is a descendant if either of its parents are
831 831 # descendants. (We seeded the dependents list with the roots
832 832 # up there, remember?)
833 833 if (p[0] in descendants) or (p[1] in descendants):
834 834 descendants.add(n)
835 835 isdescendant = True
836 836 if isdescendant and ((ancestors is None) or (n in ancestors)):
837 837 # Only include nodes that are both descendants and ancestors.
838 838 orderedout.append(n)
839 839 if (ancestors is not None) and (n in heads):
840 840 # We're trying to figure out which heads are reachable
841 841 # from roots.
842 842 # Mark this head as having been reached
843 843 heads[n] = True
844 844 elif ancestors is None:
845 845 # Otherwise, we're trying to discover the heads.
846 846 # Assume this is a head because if it isn't, the next step
847 847 # will eventually remove it.
848 848 heads[n] = True
849 849 # But, obviously its parents aren't.
850 850 for p in self.parents(n):
851 851 heads.pop(p, None)
852 852 heads = [head for head, flag in heads.iteritems() if flag]
853 853 roots = list(roots)
854 854 assert orderedout
855 855 assert roots
856 856 assert heads
857 857 return (orderedout, roots, heads)
858 858
859 859 def headrevs(self):
860 860 try:
861 861 return self.index.headrevs()
862 862 except AttributeError:
863 863 return self._headrevs()
864 864
865 865 def computephases(self, roots):
866 866 return self.index.computephasesmapsets(roots)
867 867
868 868 def _headrevs(self):
869 869 count = len(self)
870 870 if not count:
871 871 return [nullrev]
872 872 # we won't iter over filtered rev so nobody is a head at start
873 873 ishead = [0] * (count + 1)
874 874 index = self.index
875 875 for r in self:
876 876 ishead[r] = 1 # I may be an head
877 877 e = index[r]
878 878 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
879 879 return [r for r, val in enumerate(ishead) if val]
880 880
881 881 def heads(self, start=None, stop=None):
882 882 """return the list of all nodes that have no children
883 883
884 884 if start is specified, only heads that are descendants of
885 885 start will be returned
886 886 if stop is specified, it will consider all the revs from stop
887 887 as if they had no children
888 888 """
889 889 if start is None and stop is None:
890 890 if not len(self):
891 891 return [nullid]
892 892 return [self.node(r) for r in self.headrevs()]
893 893
894 894 if start is None:
895 895 start = nullid
896 896 if stop is None:
897 897 stop = []
898 898 stoprevs = set([self.rev(n) for n in stop])
899 899 startrev = self.rev(start)
900 900 reachable = set((startrev,))
901 901 heads = set((startrev,))
902 902
903 903 parentrevs = self.parentrevs
904 904 for r in self.revs(start=startrev + 1):
905 905 for p in parentrevs(r):
906 906 if p in reachable:
907 907 if r not in stoprevs:
908 908 reachable.add(r)
909 909 heads.add(r)
910 910 if p in heads and p not in stoprevs:
911 911 heads.remove(p)
912 912
913 913 return [self.node(r) for r in heads]
914 914
915 915 def children(self, node):
916 916 """find the children of a given node"""
917 917 c = []
918 918 p = self.rev(node)
919 919 for r in self.revs(start=p + 1):
920 920 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
921 921 if prevs:
922 922 for pr in prevs:
923 923 if pr == p:
924 924 c.append(self.node(r))
925 925 elif p == nullrev:
926 926 c.append(self.node(r))
927 927 return c
928 928
929 929 def descendant(self, start, end):
930 930 if start == nullrev:
931 931 return True
932 932 for i in self.descendants([start]):
933 933 if i == end:
934 934 return True
935 935 elif i > end:
936 936 break
937 937 return False
938 938
939 939 def commonancestorsheads(self, a, b):
940 940 """calculate all the heads of the common ancestors of nodes a and b"""
941 941 a, b = self.rev(a), self.rev(b)
942 942 try:
943 943 ancs = self.index.commonancestorsheads(a, b)
944 944 except (AttributeError, OverflowError): # C implementation failed
945 945 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
946 946 return map(self.node, ancs)
947 947
948 948 def isancestor(self, a, b):
949 949 """return True if node a is an ancestor of node b
950 950
951 951 The implementation of this is trivial but the use of
952 952 commonancestorsheads is not."""
953 953 return a in self.commonancestorsheads(a, b)
954 954
955 955 def ancestor(self, a, b):
956 956 """calculate the "best" common ancestor of nodes a and b"""
957 957
958 958 a, b = self.rev(a), self.rev(b)
959 959 try:
960 960 ancs = self.index.ancestors(a, b)
961 961 except (AttributeError, OverflowError):
962 962 ancs = ancestor.ancestors(self.parentrevs, a, b)
963 963 if ancs:
964 964 # choose a consistent winner when there's a tie
965 965 return min(map(self.node, ancs))
966 966 return nullid
967 967
968 968 def _match(self, id):
969 969 if isinstance(id, int):
970 970 # rev
971 971 return self.node(id)
972 972 if len(id) == 20:
973 973 # possibly a binary node
974 974 # odds of a binary node being all hex in ASCII are 1 in 10**25
975 975 try:
976 976 node = id
977 977 self.rev(node) # quick search the index
978 978 return node
979 979 except LookupError:
980 980 pass # may be partial hex id
981 981 try:
982 982 # str(rev)
983 983 rev = int(id)
984 984 if str(rev) != id:
985 985 raise ValueError
986 986 if rev < 0:
987 987 rev = len(self) + rev
988 988 if rev < 0 or rev >= len(self):
989 989 raise ValueError
990 990 return self.node(rev)
991 991 except (ValueError, OverflowError):
992 992 pass
993 993 if len(id) == 40:
994 994 try:
995 995 # a full hex nodeid?
996 996 node = bin(id)
997 997 self.rev(node)
998 998 return node
999 999 except (TypeError, LookupError):
1000 1000 pass
1001 1001
1002 1002 def _partialmatch(self, id):
1003 1003 try:
1004 1004 partial = self.index.partialmatch(id)
1005 1005 if partial and self.hasnode(partial):
1006 1006 return partial
1007 1007 return None
1008 1008 except RevlogError:
1009 1009 # parsers.c radix tree lookup gave multiple matches
1010 1010 # fast path: for unfiltered changelog, radix tree is accurate
1011 1011 if not getattr(self, 'filteredrevs', None):
1012 1012 raise LookupError(id, self.indexfile,
1013 1013 _('ambiguous identifier'))
1014 1014 # fall through to slow path that filters hidden revisions
1015 1015 except (AttributeError, ValueError):
1016 1016 # we are pure python, or key was too short to search radix tree
1017 1017 pass
1018 1018
1019 1019 if id in self._pcache:
1020 1020 return self._pcache[id]
1021 1021
1022 1022 if len(id) < 40:
1023 1023 try:
1024 1024 # hex(node)[:...]
1025 1025 l = len(id) // 2 # grab an even number of digits
1026 1026 prefix = bin(id[:l * 2])
1027 1027 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1028 1028 nl = [n for n in nl if hex(n).startswith(id) and
1029 1029 self.hasnode(n)]
1030 1030 if len(nl) > 0:
1031 1031 if len(nl) == 1:
1032 1032 self._pcache[id] = nl[0]
1033 1033 return nl[0]
1034 1034 raise LookupError(id, self.indexfile,
1035 1035 _('ambiguous identifier'))
1036 1036 return None
1037 1037 except TypeError:
1038 1038 pass
1039 1039
1040 1040 def lookup(self, id):
1041 1041 """locate a node based on:
1042 1042 - revision number or str(revision number)
1043 1043 - nodeid or subset of hex nodeid
1044 1044 """
1045 1045 n = self._match(id)
1046 1046 if n is not None:
1047 1047 return n
1048 1048 n = self._partialmatch(id)
1049 1049 if n:
1050 1050 return n
1051 1051
1052 1052 raise LookupError(id, self.indexfile, _('no match found'))
1053 1053
1054 1054 def cmp(self, node, text):
1055 1055 """compare text with a given file revision
1056 1056
1057 1057 returns True if text is different than what is stored.
1058 1058 """
1059 1059 p1, p2 = self.parents(node)
1060 1060 return hash(text, p1, p2) != node
1061 1061
1062 1062 def _addchunk(self, offset, data):
1063 1063 """Add a segment to the revlog cache.
1064 1064
1065 1065 Accepts an absolute offset and the data that is at that location.
1066 1066 """
1067 1067 o, d = self._chunkcache
1068 1068 # try to add to existing cache
1069 1069 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1070 1070 self._chunkcache = o, d + data
1071 1071 else:
1072 1072 self._chunkcache = offset, data
1073 1073
1074 1074 def _loadchunk(self, offset, length, df=None):
1075 1075 """Load a segment of raw data from the revlog.
1076 1076
1077 1077 Accepts an absolute offset, length to read, and an optional existing
1078 1078 file handle to read from.
1079 1079
1080 1080 If an existing file handle is passed, it will be seeked and the
1081 1081 original seek position will NOT be restored.
1082 1082
1083 1083 Returns a str or buffer of raw byte data.
1084 1084 """
1085 1085 if df is not None:
1086 1086 closehandle = False
1087 1087 else:
1088 1088 if self._inline:
1089 1089 df = self.opener(self.indexfile)
1090 1090 else:
1091 1091 df = self.opener(self.datafile)
1092 1092 closehandle = True
1093 1093
1094 1094 # Cache data both forward and backward around the requested
1095 1095 # data, in a fixed size window. This helps speed up operations
1096 1096 # involving reading the revlog backwards.
1097 1097 cachesize = self._chunkcachesize
1098 1098 realoffset = offset & ~(cachesize - 1)
1099 1099 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1100 1100 - realoffset)
1101 1101 df.seek(realoffset)
1102 1102 d = df.read(reallength)
1103 1103 if closehandle:
1104 1104 df.close()
1105 1105 self._addchunk(realoffset, d)
1106 1106 if offset != realoffset or reallength != length:
1107 1107 return util.buffer(d, offset - realoffset, length)
1108 1108 return d
1109 1109
1110 1110 def _getchunk(self, offset, length, df=None):
1111 1111 """Obtain a segment of raw data from the revlog.
1112 1112
1113 1113 Accepts an absolute offset, length of bytes to obtain, and an
1114 1114 optional file handle to the already-opened revlog. If the file
1115 1115 handle is used, it's original seek position will not be preserved.
1116 1116
1117 1117 Requests for data may be returned from a cache.
1118 1118
1119 1119 Returns a str or a buffer instance of raw byte data.
1120 1120 """
1121 1121 o, d = self._chunkcache
1122 1122 l = len(d)
1123 1123
1124 1124 # is it in the cache?
1125 1125 cachestart = offset - o
1126 1126 cacheend = cachestart + length
1127 1127 if cachestart >= 0 and cacheend <= l:
1128 1128 if cachestart == 0 and cacheend == l:
1129 1129 return d # avoid a copy
1130 1130 return util.buffer(d, cachestart, cacheend - cachestart)
1131 1131
1132 1132 return self._loadchunk(offset, length, df=df)
1133 1133
1134 1134 def _chunkraw(self, startrev, endrev, df=None):
1135 1135 """Obtain a segment of raw data corresponding to a range of revisions.
1136 1136
1137 1137 Accepts the start and end revisions and an optional already-open
1138 1138 file handle to be used for reading. If the file handle is read, its
1139 1139 seek position will not be preserved.
1140 1140
1141 1141 Requests for data may be satisfied by a cache.
1142 1142
1143 1143 Returns a 2-tuple of (offset, data) for the requested range of
1144 1144 revisions. Offset is the integer offset from the beginning of the
1145 1145 revlog and data is a str or buffer of the raw byte data.
1146 1146
1147 1147 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1148 1148 to determine where each revision's data begins and ends.
1149 1149 """
1150 1150 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1151 1151 # (functions are expensive).
1152 1152 index = self.index
1153 1153 istart = index[startrev]
1154 1154 start = int(istart[0] >> 16)
1155 1155 if startrev == endrev:
1156 1156 end = start + istart[1]
1157 1157 else:
1158 1158 iend = index[endrev]
1159 1159 end = int(iend[0] >> 16) + iend[1]
1160 1160
1161 1161 if self._inline:
1162 1162 start += (startrev + 1) * self._io.size
1163 1163 end += (endrev + 1) * self._io.size
1164 1164 length = end - start
1165 1165
1166 1166 return start, self._getchunk(start, length, df=df)
1167 1167
1168 1168 def _chunk(self, rev, df=None):
1169 1169 """Obtain a single decompressed chunk for a revision.
1170 1170
1171 1171 Accepts an integer revision and an optional already-open file handle
1172 1172 to be used for reading. If used, the seek position of the file will not
1173 1173 be preserved.
1174 1174
1175 1175 Returns a str holding uncompressed data for the requested revision.
1176 1176 """
1177 1177 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1178 1178
1179 1179 def _chunks(self, revs, df=None):
1180 1180 """Obtain decompressed chunks for the specified revisions.
1181 1181
1182 1182 Accepts an iterable of numeric revisions that are assumed to be in
1183 1183 ascending order. Also accepts an optional already-open file handle
1184 1184 to be used for reading. If used, the seek position of the file will
1185 1185 not be preserved.
1186 1186
1187 1187 This function is similar to calling ``self._chunk()`` multiple times,
1188 1188 but is faster.
1189 1189
1190 1190 Returns a list with decompressed data for each requested revision.
1191 1191 """
1192 1192 if not revs:
1193 1193 return []
1194 1194 start = self.start
1195 1195 length = self.length
1196 1196 inline = self._inline
1197 1197 iosize = self._io.size
1198 1198 buffer = util.buffer
1199 1199
1200 1200 l = []
1201 1201 ladd = l.append
1202 1202
1203 1203 try:
1204 1204 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1205 1205 except OverflowError:
1206 1206 # issue4215 - we can't cache a run of chunks greater than
1207 1207 # 2G on Windows
1208 1208 return [self._chunk(rev, df=df) for rev in revs]
1209 1209
1210 1210 decomp = self.decompress
1211 1211 for rev in revs:
1212 1212 chunkstart = start(rev)
1213 1213 if inline:
1214 1214 chunkstart += (rev + 1) * iosize
1215 1215 chunklength = length(rev)
1216 1216 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1217 1217
1218 1218 return l
1219 1219
1220 1220 def _chunkclear(self):
1221 1221 """Clear the raw chunk cache."""
1222 1222 self._chunkcache = (0, '')
1223 1223
1224 1224 def deltaparent(self, rev):
1225 1225 """return deltaparent of the given revision"""
1226 1226 base = self.index[rev][3]
1227 1227 if base == rev:
1228 1228 return nullrev
1229 1229 elif self._generaldelta:
1230 1230 return base
1231 1231 else:
1232 1232 return rev - 1
1233 1233
1234 1234 def revdiff(self, rev1, rev2):
1235 1235 """return or calculate a delta between two revisions"""
1236 1236 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1237 1237 return str(self._chunk(rev2))
1238 1238
1239 1239 return mdiff.textdiff(self.revision(rev1),
1240 1240 self.revision(rev2))
1241 1241
1242 1242 def revision(self, nodeorrev, _df=None, raw=False):
1243 1243 """return an uncompressed revision of a given node or revision
1244 1244 number.
1245 1245
1246 1246 _df - an existing file handle to read from. (internal-only)
1247 1247 raw - an optional argument specifying if the revision data is to be
1248 1248 treated as raw data when applying flag transforms. 'raw' should be set
1249 1249 to True when generating changegroups or in debug commands.
1250 1250 """
1251 1251 if isinstance(nodeorrev, int):
1252 1252 rev = nodeorrev
1253 1253 node = self.node(rev)
1254 1254 else:
1255 1255 node = nodeorrev
1256 1256 rev = None
1257 1257
1258 1258 cachedrev = None
1259 1259 if node == nullid:
1260 1260 return ""
1261 1261 if self._cache:
1262 1262 if self._cache[0] == node:
1263 1263 return self._cache[2]
1264 1264 cachedrev = self._cache[1]
1265 1265
1266 1266 # look up what we need to read
1267 1267 text = None
1268 1268 if rev is None:
1269 1269 rev = self.rev(node)
1270 1270
1271 1271 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1272 1272 if stopped:
1273 1273 text = self._cache[2]
1274 1274
1275 1275 # drop cache to save memory
1276 1276 self._cache = None
1277 1277
1278 1278 bins = self._chunks(chain, df=_df)
1279 1279 if text is None:
1280 text = str(bins[0])
1280 text = bytes(bins[0])
1281 1281 bins = bins[1:]
1282 1282
1283 1283 text = mdiff.patches(text, bins)
1284 1284
1285 1285 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1286 1286 raw=raw)
1287 1287 if validatehash:
1288 1288 self.checkhash(text, node, rev=rev)
1289 1289
1290 1290 self._cache = (node, rev, text)
1291 1291 return text
1292 1292
1293 1293 def hash(self, text, p1, p2):
1294 1294 """Compute a node hash.
1295 1295
1296 1296 Available as a function so that subclasses can replace the hash
1297 1297 as needed.
1298 1298 """
1299 1299 return hash(text, p1, p2)
1300 1300
1301 1301 def _processflags(self, text, flags, operation, raw=False):
1302 1302 """Inspect revision data flags and applies transforms defined by
1303 1303 registered flag processors.
1304 1304
1305 1305 ``text`` - the revision data to process
1306 1306 ``flags`` - the revision flags
1307 1307 ``operation`` - the operation being performed (read or write)
1308 1308 ``raw`` - an optional argument describing if the raw transform should be
1309 1309 applied.
1310 1310
1311 1311 This method processes the flags in the order (or reverse order if
1312 1312 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1313 1313 flag processors registered for present flags. The order of flags defined
1314 1314 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1315 1315
1316 1316 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1317 1317 processed text and ``validatehash`` is a bool indicating whether the
1318 1318 returned text should be checked for hash integrity.
1319 1319
1320 1320 Note: If the ``raw`` argument is set, it has precedence over the
1321 1321 operation and will only update the value of ``validatehash``.
1322 1322 """
1323 1323 if not operation in ('read', 'write'):
1324 1324 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1325 1325 # Check all flags are known.
1326 1326 if flags & ~REVIDX_KNOWN_FLAGS:
1327 1327 raise RevlogError(_("incompatible revision flag '%#x'") %
1328 1328 (flags & ~REVIDX_KNOWN_FLAGS))
1329 1329 validatehash = True
1330 1330 # Depending on the operation (read or write), the order might be
1331 1331 # reversed due to non-commutative transforms.
1332 1332 orderedflags = REVIDX_FLAGS_ORDER
1333 1333 if operation == 'write':
1334 1334 orderedflags = reversed(orderedflags)
1335 1335
1336 1336 for flag in orderedflags:
1337 1337 # If a flagprocessor has been registered for a known flag, apply the
1338 1338 # related operation transform and update result tuple.
1339 1339 if flag & flags:
1340 1340 vhash = True
1341 1341
1342 1342 if flag not in _flagprocessors:
1343 1343 message = _("missing processor for flag '%#x'") % (flag)
1344 1344 raise RevlogError(message)
1345 1345
1346 1346 processor = _flagprocessors[flag]
1347 1347 if processor is not None:
1348 1348 readtransform, writetransform, rawtransform = processor
1349 1349
1350 1350 if raw:
1351 1351 vhash = rawtransform(self, text)
1352 1352 elif operation == 'read':
1353 1353 text, vhash = readtransform(self, text)
1354 1354 else: # write operation
1355 1355 text, vhash = writetransform(self, text)
1356 1356 validatehash = validatehash and vhash
1357 1357
1358 1358 return text, validatehash
1359 1359
1360 1360 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1361 1361 """Check node hash integrity.
1362 1362
1363 1363 Available as a function so that subclasses can extend hash mismatch
1364 1364 behaviors as needed.
1365 1365 """
1366 1366 if p1 is None and p2 is None:
1367 1367 p1, p2 = self.parents(node)
1368 1368 if node != self.hash(text, p1, p2):
1369 1369 revornode = rev
1370 1370 if revornode is None:
1371 1371 revornode = templatefilters.short(hex(node))
1372 1372 raise RevlogError(_("integrity check failed on %s:%s")
1373 1373 % (self.indexfile, revornode))
1374 1374
1375 1375 def checkinlinesize(self, tr, fp=None):
1376 1376 """Check if the revlog is too big for inline and convert if so.
1377 1377
1378 1378 This should be called after revisions are added to the revlog. If the
1379 1379 revlog has grown too large to be an inline revlog, it will convert it
1380 1380 to use multiple index and data files.
1381 1381 """
1382 1382 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1383 1383 return
1384 1384
1385 1385 trinfo = tr.find(self.indexfile)
1386 1386 if trinfo is None:
1387 1387 raise RevlogError(_("%s not found in the transaction")
1388 1388 % self.indexfile)
1389 1389
1390 1390 trindex = trinfo[2]
1391 1391 if trindex is not None:
1392 1392 dataoff = self.start(trindex)
1393 1393 else:
1394 1394 # revlog was stripped at start of transaction, use all leftover data
1395 1395 trindex = len(self) - 1
1396 1396 dataoff = self.end(-2)
1397 1397
1398 1398 tr.add(self.datafile, dataoff)
1399 1399
1400 1400 if fp:
1401 1401 fp.flush()
1402 1402 fp.close()
1403 1403
1404 1404 df = self.opener(self.datafile, 'w')
1405 1405 try:
1406 1406 for r in self:
1407 1407 df.write(self._chunkraw(r, r)[1])
1408 1408 finally:
1409 1409 df.close()
1410 1410
1411 1411 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1412 1412 checkambig=self._checkambig)
1413 1413 self.version &= ~(REVLOGNGINLINEDATA)
1414 1414 self._inline = False
1415 1415 for i in self:
1416 1416 e = self._io.packentry(self.index[i], self.node, self.version, i)
1417 1417 fp.write(e)
1418 1418
1419 1419 # if we don't call close, the temp file will never replace the
1420 1420 # real index
1421 1421 fp.close()
1422 1422
1423 1423 tr.replace(self.indexfile, trindex * self._io.size)
1424 1424 self._chunkclear()
1425 1425
1426 1426 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1427 1427 node=None, flags=REVIDX_DEFAULT_FLAGS):
1428 1428 """add a revision to the log
1429 1429
1430 1430 text - the revision data to add
1431 1431 transaction - the transaction object used for rollback
1432 1432 link - the linkrev data to add
1433 1433 p1, p2 - the parent nodeids of the revision
1434 1434 cachedelta - an optional precomputed delta
1435 1435 node - nodeid of revision; typically node is not specified, and it is
1436 1436 computed by default as hash(text, p1, p2), however subclasses might
1437 1437 use different hashing method (and override checkhash() in such case)
1438 1438 flags - the known flags to set on the revision
1439 1439 """
1440 1440 if link == nullrev:
1441 1441 raise RevlogError(_("attempted to add linkrev -1 to %s")
1442 1442 % self.indexfile)
1443 1443
1444 1444 if flags:
1445 1445 node = node or self.hash(text, p1, p2)
1446 1446
1447 1447 newtext, validatehash = self._processflags(text, flags, 'write')
1448 1448
1449 1449 # If the flag processor modifies the revision data, ignore any provided
1450 1450 # cachedelta.
1451 1451 if newtext != text:
1452 1452 cachedelta = None
1453 1453 text = newtext
1454 1454
1455 1455 if len(text) > _maxentrysize:
1456 1456 raise RevlogError(
1457 1457 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1458 1458 % (self.indexfile, len(text)))
1459 1459
1460 1460 node = node or self.hash(text, p1, p2)
1461 1461 if node in self.nodemap:
1462 1462 return node
1463 1463
1464 1464 if validatehash:
1465 1465 self.checkhash(text, node, p1=p1, p2=p2)
1466 1466
1467 1467 dfh = None
1468 1468 if not self._inline:
1469 1469 dfh = self.opener(self.datafile, "a+")
1470 1470 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1471 1471 try:
1472 1472 return self._addrevision(node, text, transaction, link, p1, p2,
1473 1473 flags, cachedelta, ifh, dfh)
1474 1474 finally:
1475 1475 if dfh:
1476 1476 dfh.close()
1477 1477 ifh.close()
1478 1478
1479 1479 def compress(self, data):
1480 1480 """Generate a possibly-compressed representation of data."""
1481 1481 if not data:
1482 1482 return '', data
1483 1483
1484 1484 compressed = self._compressor.compress(data)
1485 1485
1486 1486 if compressed:
1487 1487 # The revlog compressor added the header in the returned data.
1488 1488 return '', compressed
1489 1489
1490 1490 if data[0] == '\0':
1491 1491 return '', data
1492 1492 return 'u', data
1493 1493
1494 1494 def decompress(self, data):
1495 1495 """Decompress a revlog chunk.
1496 1496
1497 1497 The chunk is expected to begin with a header identifying the
1498 1498 format type so it can be routed to an appropriate decompressor.
1499 1499 """
1500 1500 if not data:
1501 1501 return data
1502 1502
1503 1503 # Revlogs are read much more frequently than they are written and many
1504 1504 # chunks only take microseconds to decompress, so performance is
1505 1505 # important here.
1506 1506 #
1507 1507 # We can make a few assumptions about revlogs:
1508 1508 #
1509 1509 # 1) the majority of chunks will be compressed (as opposed to inline
1510 1510 # raw data).
1511 1511 # 2) decompressing *any* data will likely by at least 10x slower than
1512 1512 # returning raw inline data.
1513 1513 # 3) we want to prioritize common and officially supported compression
1514 1514 # engines
1515 1515 #
1516 1516 # It follows that we want to optimize for "decompress compressed data
1517 1517 # when encoded with common and officially supported compression engines"
1518 1518 # case over "raw data" and "data encoded by less common or non-official
1519 1519 # compression engines." That is why we have the inline lookup first
1520 1520 # followed by the compengines lookup.
1521 1521 #
1522 1522 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1523 1523 # compressed chunks. And this matters for changelog and manifest reads.
1524 1524 t = data[0:1]
1525 1525
1526 1526 if t == 'x':
1527 1527 try:
1528 1528 return _zlibdecompress(data)
1529 1529 except zlib.error as e:
1530 1530 raise RevlogError(_('revlog decompress error: %s') % str(e))
1531 1531 # '\0' is more common than 'u' so it goes first.
1532 1532 elif t == '\0':
1533 1533 return data
1534 1534 elif t == 'u':
1535 1535 return util.buffer(data, 1)
1536 1536
1537 1537 try:
1538 1538 compressor = self._decompressors[t]
1539 1539 except KeyError:
1540 1540 try:
1541 1541 engine = util.compengines.forrevlogheader(t)
1542 1542 compressor = engine.revlogcompressor()
1543 1543 self._decompressors[t] = compressor
1544 1544 except KeyError:
1545 1545 raise RevlogError(_('unknown compression type %r') % t)
1546 1546
1547 1547 return compressor.decompress(data)
1548 1548
1549 1549 def _isgooddelta(self, d, textlen):
1550 1550 """Returns True if the given delta is good. Good means that it is within
1551 1551 the disk span, disk size, and chain length bounds that we know to be
1552 1552 performant."""
1553 1553 if d is None:
1554 1554 return False
1555 1555
1556 1556 # - 'dist' is the distance from the base revision -- bounding it limits
1557 1557 # the amount of I/O we need to do.
1558 1558 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1559 1559 # to apply -- bounding it limits the amount of CPU we consume.
1560 1560 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1561 1561 if (dist > textlen * 4 or l > textlen or
1562 1562 compresseddeltalen > textlen * 2 or
1563 1563 (self._maxchainlen and chainlen > self._maxchainlen)):
1564 1564 return False
1565 1565
1566 1566 return True
1567 1567
1568 1568 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1569 1569 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1570 1570 """internal function to add revisions to the log
1571 1571
1572 1572 see addrevision for argument descriptions.
1573 1573 invariants:
1574 1574 - text is optional (can be None); if not set, cachedelta must be set.
1575 1575 if both are set, they must correspond to each other.
1576 1576 - raw is optional; if set to True, it indicates the revision data is to
1577 1577 be treated by _processflags() as raw. It is usually set by changegroup
1578 1578 generation and debug commands.
1579 1579 """
1580 1580 btext = [text]
1581 1581 def buildtext():
1582 1582 if btext[0] is not None:
1583 1583 return btext[0]
1584 1584 baserev = cachedelta[0]
1585 1585 delta = cachedelta[1]
1586 1586 # special case deltas which replace entire base; no need to decode
1587 1587 # base revision. this neatly avoids censored bases, which throw when
1588 1588 # they're decoded.
1589 1589 hlen = struct.calcsize(">lll")
1590 1590 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1591 1591 len(delta) - hlen):
1592 1592 btext[0] = delta[hlen:]
1593 1593 else:
1594 1594 if self._inline:
1595 1595 fh = ifh
1596 1596 else:
1597 1597 fh = dfh
1598 1598 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1599 1599 btext[0] = mdiff.patch(basetext, delta)
1600 1600
1601 1601 try:
1602 1602 res = self._processflags(btext[0], flags, 'read', raw=raw)
1603 1603 btext[0], validatehash = res
1604 1604 if validatehash:
1605 1605 self.checkhash(btext[0], node, p1=p1, p2=p2)
1606 1606 if flags & REVIDX_ISCENSORED:
1607 1607 raise RevlogError(_('node %s is not censored') % node)
1608 1608 except CensoredNodeError:
1609 1609 # must pass the censored index flag to add censored revisions
1610 1610 if not flags & REVIDX_ISCENSORED:
1611 1611 raise
1612 1612 return btext[0]
1613 1613
1614 1614 def builddelta(rev):
1615 1615 # can we use the cached delta?
1616 1616 if cachedelta and cachedelta[0] == rev:
1617 1617 delta = cachedelta[1]
1618 1618 else:
1619 1619 t = buildtext()
1620 1620 if self.iscensored(rev):
1621 1621 # deltas based on a censored revision must replace the
1622 1622 # full content in one patch, so delta works everywhere
1623 1623 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1624 1624 delta = header + t
1625 1625 else:
1626 1626 if self._inline:
1627 1627 fh = ifh
1628 1628 else:
1629 1629 fh = dfh
1630 1630 ptext = self.revision(self.node(rev), _df=fh)
1631 1631 delta = mdiff.textdiff(ptext, t)
1632 1632 header, data = self.compress(delta)
1633 1633 deltalen = len(header) + len(data)
1634 1634 chainbase = self.chainbase(rev)
1635 1635 dist = deltalen + offset - self.start(chainbase)
1636 1636 if self._generaldelta:
1637 1637 base = rev
1638 1638 else:
1639 1639 base = chainbase
1640 1640 chainlen, compresseddeltalen = self._chaininfo(rev)
1641 1641 chainlen += 1
1642 1642 compresseddeltalen += deltalen
1643 1643 return (dist, deltalen, (header, data), base,
1644 1644 chainbase, chainlen, compresseddeltalen)
1645 1645
1646 1646 curr = len(self)
1647 1647 prev = curr - 1
1648 1648 offset = self.end(prev)
1649 1649 delta = None
1650 1650 p1r, p2r = self.rev(p1), self.rev(p2)
1651 1651
1652 1652 # full versions are inserted when the needed deltas
1653 1653 # become comparable to the uncompressed text
1654 1654 if text is None:
1655 1655 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1656 1656 cachedelta[1])
1657 1657 else:
1658 1658 textlen = len(text)
1659 1659
1660 1660 # should we try to build a delta?
1661 1661 if prev != nullrev and self.storedeltachains:
1662 1662 tested = set()
1663 1663 # This condition is true most of the time when processing
1664 1664 # changegroup data into a generaldelta repo. The only time it
1665 1665 # isn't true is if this is the first revision in a delta chain
1666 1666 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1667 1667 if cachedelta and self._generaldelta and self._lazydeltabase:
1668 1668 # Assume what we received from the server is a good choice
1669 1669 # build delta will reuse the cache
1670 1670 candidatedelta = builddelta(cachedelta[0])
1671 1671 tested.add(cachedelta[0])
1672 1672 if self._isgooddelta(candidatedelta, textlen):
1673 1673 delta = candidatedelta
1674 1674 if delta is None and self._generaldelta:
1675 1675 # exclude already lazy tested base if any
1676 1676 parents = [p for p in (p1r, p2r)
1677 1677 if p != nullrev and p not in tested]
1678 1678 if parents and not self._aggressivemergedeltas:
1679 1679 # Pick whichever parent is closer to us (to minimize the
1680 1680 # chance of having to build a fulltext).
1681 1681 parents = [max(parents)]
1682 1682 tested.update(parents)
1683 1683 pdeltas = []
1684 1684 for p in parents:
1685 1685 pd = builddelta(p)
1686 1686 if self._isgooddelta(pd, textlen):
1687 1687 pdeltas.append(pd)
1688 1688 if pdeltas:
1689 1689 delta = min(pdeltas, key=lambda x: x[1])
1690 1690 if delta is None and prev not in tested:
1691 1691 # other approach failed try against prev to hopefully save us a
1692 1692 # fulltext.
1693 1693 candidatedelta = builddelta(prev)
1694 1694 if self._isgooddelta(candidatedelta, textlen):
1695 1695 delta = candidatedelta
1696 1696 if delta is not None:
1697 1697 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1698 1698 else:
1699 1699 text = buildtext()
1700 1700 data = self.compress(text)
1701 1701 l = len(data[1]) + len(data[0])
1702 1702 base = chainbase = curr
1703 1703
1704 1704 e = (offset_type(offset, flags), l, textlen,
1705 1705 base, link, p1r, p2r, node)
1706 1706 self.index.insert(-1, e)
1707 1707 self.nodemap[node] = curr
1708 1708
1709 1709 entry = self._io.packentry(e, self.node, self.version, curr)
1710 1710 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1711 1711
1712 1712 if alwayscache and text is None:
1713 1713 text = buildtext()
1714 1714
1715 1715 if type(text) == str: # only accept immutable objects
1716 1716 self._cache = (node, curr, text)
1717 1717 self._chainbasecache[curr] = chainbase
1718 1718 return node
1719 1719
1720 1720 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1721 1721 # Files opened in a+ mode have inconsistent behavior on various
1722 1722 # platforms. Windows requires that a file positioning call be made
1723 1723 # when the file handle transitions between reads and writes. See
1724 1724 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1725 1725 # platforms, Python or the platform itself can be buggy. Some versions
1726 1726 # of Solaris have been observed to not append at the end of the file
1727 1727 # if the file was seeked to before the end. See issue4943 for more.
1728 1728 #
1729 1729 # We work around this issue by inserting a seek() before writing.
1730 1730 # Note: This is likely not necessary on Python 3.
1731 1731 ifh.seek(0, os.SEEK_END)
1732 1732 if dfh:
1733 1733 dfh.seek(0, os.SEEK_END)
1734 1734
1735 1735 curr = len(self) - 1
1736 1736 if not self._inline:
1737 1737 transaction.add(self.datafile, offset)
1738 1738 transaction.add(self.indexfile, curr * len(entry))
1739 1739 if data[0]:
1740 1740 dfh.write(data[0])
1741 1741 dfh.write(data[1])
1742 1742 ifh.write(entry)
1743 1743 else:
1744 1744 offset += curr * self._io.size
1745 1745 transaction.add(self.indexfile, offset, curr)
1746 1746 ifh.write(entry)
1747 1747 ifh.write(data[0])
1748 1748 ifh.write(data[1])
1749 1749 self.checkinlinesize(transaction, ifh)
1750 1750
1751 1751 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1752 1752 """
1753 1753 add a delta group
1754 1754
1755 1755 given a set of deltas, add them to the revision log. the
1756 1756 first delta is against its parent, which should be in our
1757 1757 log, the rest are against the previous delta.
1758 1758
1759 1759 If ``addrevisioncb`` is defined, it will be called with arguments of
1760 1760 this revlog and the node that was added.
1761 1761 """
1762 1762
1763 1763 # track the base of the current delta log
1764 1764 content = []
1765 1765 node = None
1766 1766
1767 1767 r = len(self)
1768 1768 end = 0
1769 1769 if r:
1770 1770 end = self.end(r - 1)
1771 1771 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1772 1772 isize = r * self._io.size
1773 1773 if self._inline:
1774 1774 transaction.add(self.indexfile, end + isize, r)
1775 1775 dfh = None
1776 1776 else:
1777 1777 transaction.add(self.indexfile, isize, r)
1778 1778 transaction.add(self.datafile, end)
1779 1779 dfh = self.opener(self.datafile, "a+")
1780 1780 def flush():
1781 1781 if dfh:
1782 1782 dfh.flush()
1783 1783 ifh.flush()
1784 1784 try:
1785 1785 # loop through our set of deltas
1786 1786 chain = None
1787 1787 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1788 1788 node = chunkdata['node']
1789 1789 p1 = chunkdata['p1']
1790 1790 p2 = chunkdata['p2']
1791 1791 cs = chunkdata['cs']
1792 1792 deltabase = chunkdata['deltabase']
1793 1793 delta = chunkdata['delta']
1794 1794 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1795 1795
1796 1796 content.append(node)
1797 1797
1798 1798 link = linkmapper(cs)
1799 1799 if node in self.nodemap:
1800 1800 # this can happen if two branches make the same change
1801 1801 chain = node
1802 1802 continue
1803 1803
1804 1804 for p in (p1, p2):
1805 1805 if p not in self.nodemap:
1806 1806 raise LookupError(p, self.indexfile,
1807 1807 _('unknown parent'))
1808 1808
1809 1809 if deltabase not in self.nodemap:
1810 1810 raise LookupError(deltabase, self.indexfile,
1811 1811 _('unknown delta base'))
1812 1812
1813 1813 baserev = self.rev(deltabase)
1814 1814
1815 1815 if baserev != nullrev and self.iscensored(baserev):
1816 1816 # if base is censored, delta must be full replacement in a
1817 1817 # single patch operation
1818 1818 hlen = struct.calcsize(">lll")
1819 1819 oldlen = self.rawsize(baserev)
1820 1820 newlen = len(delta) - hlen
1821 1821 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1822 1822 raise error.CensoredBaseError(self.indexfile,
1823 1823 self.node(baserev))
1824 1824
1825 1825 if not flags and self._peek_iscensored(baserev, delta, flush):
1826 1826 flags |= REVIDX_ISCENSORED
1827 1827
1828 1828 # We assume consumers of addrevisioncb will want to retrieve
1829 1829 # the added revision, which will require a call to
1830 1830 # revision(). revision() will fast path if there is a cache
1831 1831 # hit. So, we tell _addrevision() to always cache in this case.
1832 1832 # We're only using addgroup() in the context of changegroup
1833 1833 # generation so the revision data can always be handled as raw
1834 1834 # by the flagprocessor.
1835 1835 chain = self._addrevision(node, None, transaction, link,
1836 1836 p1, p2, flags, (baserev, delta),
1837 1837 ifh, dfh,
1838 1838 alwayscache=bool(addrevisioncb),
1839 1839 raw=True)
1840 1840
1841 1841 if addrevisioncb:
1842 1842 addrevisioncb(self, chain)
1843 1843
1844 1844 if not dfh and not self._inline:
1845 1845 # addrevision switched from inline to conventional
1846 1846 # reopen the index
1847 1847 ifh.close()
1848 1848 dfh = self.opener(self.datafile, "a+")
1849 1849 ifh = self.opener(self.indexfile, "a+",
1850 1850 checkambig=self._checkambig)
1851 1851 finally:
1852 1852 if dfh:
1853 1853 dfh.close()
1854 1854 ifh.close()
1855 1855
1856 1856 return content
1857 1857
1858 1858 def iscensored(self, rev):
1859 1859 """Check if a file revision is censored."""
1860 1860 return False
1861 1861
1862 1862 def _peek_iscensored(self, baserev, delta, flush):
1863 1863 """Quickly check if a delta produces a censored revision."""
1864 1864 return False
1865 1865
1866 1866 def getstrippoint(self, minlink):
1867 1867 """find the minimum rev that must be stripped to strip the linkrev
1868 1868
1869 1869 Returns a tuple containing the minimum rev and a set of all revs that
1870 1870 have linkrevs that will be broken by this strip.
1871 1871 """
1872 1872 brokenrevs = set()
1873 1873 strippoint = len(self)
1874 1874
1875 1875 heads = {}
1876 1876 futurelargelinkrevs = set()
1877 1877 for head in self.headrevs():
1878 1878 headlinkrev = self.linkrev(head)
1879 1879 heads[head] = headlinkrev
1880 1880 if headlinkrev >= minlink:
1881 1881 futurelargelinkrevs.add(headlinkrev)
1882 1882
1883 1883 # This algorithm involves walking down the rev graph, starting at the
1884 1884 # heads. Since the revs are topologically sorted according to linkrev,
1885 1885 # once all head linkrevs are below the minlink, we know there are
1886 1886 # no more revs that could have a linkrev greater than minlink.
1887 1887 # So we can stop walking.
1888 1888 while futurelargelinkrevs:
1889 1889 strippoint -= 1
1890 1890 linkrev = heads.pop(strippoint)
1891 1891
1892 1892 if linkrev < minlink:
1893 1893 brokenrevs.add(strippoint)
1894 1894 else:
1895 1895 futurelargelinkrevs.remove(linkrev)
1896 1896
1897 1897 for p in self.parentrevs(strippoint):
1898 1898 if p != nullrev:
1899 1899 plinkrev = self.linkrev(p)
1900 1900 heads[p] = plinkrev
1901 1901 if plinkrev >= minlink:
1902 1902 futurelargelinkrevs.add(plinkrev)
1903 1903
1904 1904 return strippoint, brokenrevs
1905 1905
1906 1906 def strip(self, minlink, transaction):
1907 1907 """truncate the revlog on the first revision with a linkrev >= minlink
1908 1908
1909 1909 This function is called when we're stripping revision minlink and
1910 1910 its descendants from the repository.
1911 1911
1912 1912 We have to remove all revisions with linkrev >= minlink, because
1913 1913 the equivalent changelog revisions will be renumbered after the
1914 1914 strip.
1915 1915
1916 1916 So we truncate the revlog on the first of these revisions, and
1917 1917 trust that the caller has saved the revisions that shouldn't be
1918 1918 removed and that it'll re-add them after this truncation.
1919 1919 """
1920 1920 if len(self) == 0:
1921 1921 return
1922 1922
1923 1923 rev, _ = self.getstrippoint(minlink)
1924 1924 if rev == len(self):
1925 1925 return
1926 1926
1927 1927 # first truncate the files on disk
1928 1928 end = self.start(rev)
1929 1929 if not self._inline:
1930 1930 transaction.add(self.datafile, end)
1931 1931 end = rev * self._io.size
1932 1932 else:
1933 1933 end += rev * self._io.size
1934 1934
1935 1935 transaction.add(self.indexfile, end)
1936 1936
1937 1937 # then reset internal state in memory to forget those revisions
1938 1938 self._cache = None
1939 1939 self._chaininfocache = {}
1940 1940 self._chunkclear()
1941 1941 for x in xrange(rev, len(self)):
1942 1942 del self.nodemap[self.node(x)]
1943 1943
1944 1944 del self.index[rev:-1]
1945 1945
1946 1946 def checksize(self):
1947 1947 expected = 0
1948 1948 if len(self):
1949 1949 expected = max(0, self.end(len(self) - 1))
1950 1950
1951 1951 try:
1952 1952 f = self.opener(self.datafile)
1953 1953 f.seek(0, 2)
1954 1954 actual = f.tell()
1955 1955 f.close()
1956 1956 dd = actual - expected
1957 1957 except IOError as inst:
1958 1958 if inst.errno != errno.ENOENT:
1959 1959 raise
1960 1960 dd = 0
1961 1961
1962 1962 try:
1963 1963 f = self.opener(self.indexfile)
1964 1964 f.seek(0, 2)
1965 1965 actual = f.tell()
1966 1966 f.close()
1967 1967 s = self._io.size
1968 1968 i = max(0, actual // s)
1969 1969 di = actual - (i * s)
1970 1970 if self._inline:
1971 1971 databytes = 0
1972 1972 for r in self:
1973 1973 databytes += max(0, self.length(r))
1974 1974 dd = 0
1975 1975 di = actual - len(self) * s - databytes
1976 1976 except IOError as inst:
1977 1977 if inst.errno != errno.ENOENT:
1978 1978 raise
1979 1979 di = 0
1980 1980
1981 1981 return (dd, di)
1982 1982
1983 1983 def files(self):
1984 1984 res = [self.indexfile]
1985 1985 if not self._inline:
1986 1986 res.append(self.datafile)
1987 1987 return res
1988 1988
1989 1989 DELTAREUSEALWAYS = 'always'
1990 1990 DELTAREUSESAMEREVS = 'samerevs'
1991 1991 DELTAREUSENEVER = 'never'
1992 1992
1993 1993 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
1994 1994
1995 1995 def clone(self, tr, destrevlog, addrevisioncb=None,
1996 1996 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
1997 1997 """Copy this revlog to another, possibly with format changes.
1998 1998
1999 1999 The destination revlog will contain the same revisions and nodes.
2000 2000 However, it may not be bit-for-bit identical due to e.g. delta encoding
2001 2001 differences.
2002 2002
2003 2003 The ``deltareuse`` argument control how deltas from the existing revlog
2004 2004 are preserved in the destination revlog. The argument can have the
2005 2005 following values:
2006 2006
2007 2007 DELTAREUSEALWAYS
2008 2008 Deltas will always be reused (if possible), even if the destination
2009 2009 revlog would not select the same revisions for the delta. This is the
2010 2010 fastest mode of operation.
2011 2011 DELTAREUSESAMEREVS
2012 2012 Deltas will be reused if the destination revlog would pick the same
2013 2013 revisions for the delta. This mode strikes a balance between speed
2014 2014 and optimization.
2015 2015 DELTAREUSENEVER
2016 2016 Deltas will never be reused. This is the slowest mode of execution.
2017 2017 This mode can be used to recompute deltas (e.g. if the diff/delta
2018 2018 algorithm changes).
2019 2019
2020 2020 Delta computation can be slow, so the choice of delta reuse policy can
2021 2021 significantly affect run time.
2022 2022
2023 2023 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2024 2024 two extremes. Deltas will be reused if they are appropriate. But if the
2025 2025 delta could choose a better revision, it will do so. This means if you
2026 2026 are converting a non-generaldelta revlog to a generaldelta revlog,
2027 2027 deltas will be recomputed if the delta's parent isn't a parent of the
2028 2028 revision.
2029 2029
2030 2030 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2031 2031 controls whether to compute deltas against both parents for merges.
2032 2032 By default, the current default is used.
2033 2033 """
2034 2034 if deltareuse not in self.DELTAREUSEALL:
2035 2035 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2036 2036
2037 2037 if len(destrevlog):
2038 2038 raise ValueError(_('destination revlog is not empty'))
2039 2039
2040 2040 if getattr(self, 'filteredrevs', None):
2041 2041 raise ValueError(_('source revlog has filtered revisions'))
2042 2042 if getattr(destrevlog, 'filteredrevs', None):
2043 2043 raise ValueError(_('destination revlog has filtered revisions'))
2044 2044
2045 2045 # lazydeltabase controls whether to reuse a cached delta, if possible.
2046 2046 oldlazydeltabase = destrevlog._lazydeltabase
2047 2047 oldamd = destrevlog._aggressivemergedeltas
2048 2048
2049 2049 try:
2050 2050 if deltareuse == self.DELTAREUSEALWAYS:
2051 2051 destrevlog._lazydeltabase = True
2052 2052 elif deltareuse == self.DELTAREUSESAMEREVS:
2053 2053 destrevlog._lazydeltabase = False
2054 2054
2055 2055 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2056 2056
2057 2057 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2058 2058 self.DELTAREUSESAMEREVS)
2059 2059
2060 2060 index = self.index
2061 2061 for rev in self:
2062 2062 entry = index[rev]
2063 2063
2064 2064 # Some classes override linkrev to take filtered revs into
2065 2065 # account. Use raw entry from index.
2066 2066 flags = entry[0] & 0xffff
2067 2067 linkrev = entry[4]
2068 2068 p1 = index[entry[5]][7]
2069 2069 p2 = index[entry[6]][7]
2070 2070 node = entry[7]
2071 2071
2072 2072 # (Possibly) reuse the delta from the revlog if allowed and
2073 2073 # the revlog chunk is a delta.
2074 2074 cachedelta = None
2075 2075 text = None
2076 2076 if populatecachedelta:
2077 2077 dp = self.deltaparent(rev)
2078 2078 if dp != nullrev:
2079 2079 cachedelta = (dp, str(self._chunk(rev)))
2080 2080
2081 2081 if not cachedelta:
2082 2082 text = self.revision(rev)
2083 2083
2084 2084 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2085 2085 checkambig=False)
2086 2086 dfh = None
2087 2087 if not destrevlog._inline:
2088 2088 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2089 2089 try:
2090 2090 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
2091 2091 flags, cachedelta, ifh, dfh)
2092 2092 finally:
2093 2093 if dfh:
2094 2094 dfh.close()
2095 2095 ifh.close()
2096 2096
2097 2097 if addrevisioncb:
2098 2098 addrevisioncb(self, rev, node)
2099 2099 finally:
2100 2100 destrevlog._lazydeltabase = oldlazydeltabase
2101 2101 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now