##// END OF EJS Templates
revlog: clarify flagprocessor documentation...
Jun Wu -
r31749:17d0dab7 default
parent child Browse files
Show More
@@ -1,2102 +1,2108 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 pycompat,
37 37 templatefilters,
38 38 util,
39 39 )
40 40
41 41 _pack = struct.pack
42 42 _unpack = struct.unpack
43 43 # Aliased for performance.
44 44 _zlibdecompress = zlib.decompress
45 45
46 46 # revlog header flags
47 47 REVLOGV0 = 0
48 48 REVLOGNG = 1
49 49 REVLOGNGINLINEDATA = (1 << 16)
50 50 REVLOGGENERALDELTA = (1 << 17)
51 51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
52 52 REVLOG_DEFAULT_FORMAT = REVLOGNG
53 53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
54 54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
55 55
56 56 # revlog index flags
57 57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
58 58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
59 59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
60 60 REVIDX_DEFAULT_FLAGS = 0
61 61 # stable order in which flags need to be processed and their processors applied
62 62 REVIDX_FLAGS_ORDER = [
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 ]
67 67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
68 68
69 69 # max size of revlog with inline data
70 70 _maxinline = 131072
71 71 _chunksize = 1048576
72 72
73 73 RevlogError = error.RevlogError
74 74 LookupError = error.LookupError
75 75 CensoredNodeError = error.CensoredNodeError
76 76 ProgrammingError = error.ProgrammingError
77 77
78 78 # Store flag processors (cf. 'addflagprocessor()' to register)
79 79 _flagprocessors = {
80 80 REVIDX_ISCENSORED: None,
81 81 }
82 82
83 83 def addflagprocessor(flag, processor):
84 84 """Register a flag processor on a revision data flag.
85 85
86 86 Invariant:
87 87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
88 88 - Only one flag processor can be registered on a specific flag.
89 89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
90 90 following signatures:
91 - (read) f(self, text) -> newtext, bool
92 - (write) f(self, text) -> newtext, bool
93 - (raw) f(self, text) -> bool
91 - (read) f(self, rawtext) -> text, bool
92 - (write) f(self, text) -> rawtext, bool
93 - (raw) f(self, rawtext) -> bool
94 "text" is presented to the user. "rawtext" is stored in revlog data, not
95 directly visible to the user.
94 96 The boolean returned by these transforms is used to determine whether
95 'newtext' can be used for hash integrity checking.
97 the returned text can be used for hash integrity checking. For example,
98 if "write" returns False, then "text" is used to generate hash. If
99 "write" returns True, that basically means "rawtext" returned by "write"
100 should be used to generate hash. Usually, "write" and "read" return
101 different booleans. And "raw" returns a same boolean as "write".
96 102
97 103 Note: The 'raw' transform is used for changegroup generation and in some
98 104 debug commands. In this case the transform only indicates whether the
99 105 contents can be used for hash integrity checks.
100 106 """
101 107 if not flag & REVIDX_KNOWN_FLAGS:
102 108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
103 109 raise ProgrammingError(msg)
104 110 if flag not in REVIDX_FLAGS_ORDER:
105 111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
106 112 raise ProgrammingError(msg)
107 113 if flag in _flagprocessors:
108 114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
109 115 raise error.Abort(msg)
110 116 _flagprocessors[flag] = processor
111 117
112 118 def getoffset(q):
113 119 return int(q >> 16)
114 120
115 121 def gettype(q):
116 122 return int(q & 0xFFFF)
117 123
118 124 def offset_type(offset, type):
119 125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
120 126 raise ValueError('unknown revlog index flags')
121 127 return int(int(offset) << 16 | type)
122 128
123 129 _nullhash = hashlib.sha1(nullid)
124 130
125 131 def hash(text, p1, p2):
126 132 """generate a hash from the given text and its parent hashes
127 133
128 134 This hash combines both the current file contents and its history
129 135 in a manner that makes it easy to distinguish nodes with the same
130 136 content in the revision graph.
131 137 """
132 138 # As of now, if one of the parent node is null, p2 is null
133 139 if p2 == nullid:
134 140 # deep copy of a hash is faster than creating one
135 141 s = _nullhash.copy()
136 142 s.update(p1)
137 143 else:
138 144 # none of the parent nodes are nullid
139 145 l = [p1, p2]
140 146 l.sort()
141 147 s = hashlib.sha1(l[0])
142 148 s.update(l[1])
143 149 s.update(text)
144 150 return s.digest()
145 151
146 152 # index v0:
147 153 # 4 bytes: offset
148 154 # 4 bytes: compressed length
149 155 # 4 bytes: base rev
150 156 # 4 bytes: link rev
151 157 # 20 bytes: parent 1 nodeid
152 158 # 20 bytes: parent 2 nodeid
153 159 # 20 bytes: nodeid
154 160 indexformatv0 = ">4l20s20s20s"
155 161
156 162 class revlogoldio(object):
157 163 def __init__(self):
158 164 self.size = struct.calcsize(indexformatv0)
159 165
160 166 def parseindex(self, data, inline):
161 167 s = self.size
162 168 index = []
163 169 nodemap = {nullid: nullrev}
164 170 n = off = 0
165 171 l = len(data)
166 172 while off + s <= l:
167 173 cur = data[off:off + s]
168 174 off += s
169 175 e = _unpack(indexformatv0, cur)
170 176 # transform to revlogv1 format
171 177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
172 178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
173 179 index.append(e2)
174 180 nodemap[e[6]] = n
175 181 n += 1
176 182
177 183 # add the magic null revision at -1
178 184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
179 185
180 186 return index, nodemap, None
181 187
182 188 def packentry(self, entry, node, version, rev):
183 189 if gettype(entry[0]):
184 190 raise RevlogError(_("index entry flags need RevlogNG"))
185 191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
186 192 node(entry[5]), node(entry[6]), entry[7])
187 193 return _pack(indexformatv0, *e2)
188 194
189 195 # index ng:
190 196 # 6 bytes: offset
191 197 # 2 bytes: flags
192 198 # 4 bytes: compressed length
193 199 # 4 bytes: uncompressed length
194 200 # 4 bytes: base rev
195 201 # 4 bytes: link rev
196 202 # 4 bytes: parent 1 rev
197 203 # 4 bytes: parent 2 rev
198 204 # 32 bytes: nodeid
199 205 indexformatng = ">Qiiiiii20s12x"
200 206 versionformat = ">I"
201 207
202 208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
203 209 # signed integer)
204 210 _maxentrysize = 0x7fffffff
205 211
206 212 class revlogio(object):
207 213 def __init__(self):
208 214 self.size = struct.calcsize(indexformatng)
209 215
210 216 def parseindex(self, data, inline):
211 217 # call the C implementation to parse the index data
212 218 index, cache = parsers.parse_index2(data, inline)
213 219 return index, getattr(index, 'nodemap', None), cache
214 220
215 221 def packentry(self, entry, node, version, rev):
216 222 p = _pack(indexformatng, *entry)
217 223 if rev == 0:
218 224 p = _pack(versionformat, version) + p[4:]
219 225 return p
220 226
221 227 class revlog(object):
222 228 """
223 229 the underlying revision storage object
224 230
225 231 A revlog consists of two parts, an index and the revision data.
226 232
227 233 The index is a file with a fixed record size containing
228 234 information on each revision, including its nodeid (hash), the
229 235 nodeids of its parents, the position and offset of its data within
230 236 the data file, and the revision it's based on. Finally, each entry
231 237 contains a linkrev entry that can serve as a pointer to external
232 238 data.
233 239
234 240 The revision data itself is a linear collection of data chunks.
235 241 Each chunk represents a revision and is usually represented as a
236 242 delta against the previous chunk. To bound lookup time, runs of
237 243 deltas are limited to about 2 times the length of the original
238 244 version data. This makes retrieval of a version proportional to
239 245 its size, or O(1) relative to the number of revisions.
240 246
241 247 Both pieces of the revlog are written to in an append-only
242 248 fashion, which means we never need to rewrite a file to insert or
243 249 remove data, and can use some simple techniques to avoid the need
244 250 for locking while reading.
245 251
246 252 If checkambig, indexfile is opened with checkambig=True at
247 253 writing, to avoid file stat ambiguity.
248 254 """
249 255 def __init__(self, opener, indexfile, checkambig=False):
250 256 """
251 257 create a revlog object
252 258
253 259 opener is a function that abstracts the file opening operation
254 260 and can be used to implement COW semantics or the like.
255 261 """
256 262 self.indexfile = indexfile
257 263 self.datafile = indexfile[:-2] + ".d"
258 264 self.opener = opener
259 265 # When True, indexfile is opened with checkambig=True at writing, to
260 266 # avoid file stat ambiguity.
261 267 self._checkambig = checkambig
262 268 # 3-tuple of (node, rev, text) for a raw revision.
263 269 self._cache = None
264 270 # Maps rev to chain base rev.
265 271 self._chainbasecache = util.lrucachedict(100)
266 272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
267 273 self._chunkcache = (0, '')
268 274 # How much data to read and cache into the raw revlog data cache.
269 275 self._chunkcachesize = 65536
270 276 self._maxchainlen = None
271 277 self._aggressivemergedeltas = False
272 278 self.index = []
273 279 # Mapping of partial identifiers to full nodes.
274 280 self._pcache = {}
275 281 # Mapping of revision integer to full node.
276 282 self._nodecache = {nullid: nullrev}
277 283 self._nodepos = None
278 284 self._compengine = 'zlib'
279 285
280 286 v = REVLOG_DEFAULT_VERSION
281 287 opts = getattr(opener, 'options', None)
282 288 if opts is not None:
283 289 if 'revlogv1' in opts:
284 290 if 'generaldelta' in opts:
285 291 v |= REVLOGGENERALDELTA
286 292 else:
287 293 v = 0
288 294 if 'chunkcachesize' in opts:
289 295 self._chunkcachesize = opts['chunkcachesize']
290 296 if 'maxchainlen' in opts:
291 297 self._maxchainlen = opts['maxchainlen']
292 298 if 'aggressivemergedeltas' in opts:
293 299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
294 300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
295 301 if 'compengine' in opts:
296 302 self._compengine = opts['compengine']
297 303
298 304 if self._chunkcachesize <= 0:
299 305 raise RevlogError(_('revlog chunk cache size %r is not greater '
300 306 'than 0') % self._chunkcachesize)
301 307 elif self._chunkcachesize & (self._chunkcachesize - 1):
302 308 raise RevlogError(_('revlog chunk cache size %r is not a power '
303 309 'of 2') % self._chunkcachesize)
304 310
305 311 indexdata = ''
306 312 self._initempty = True
307 313 try:
308 314 f = self.opener(self.indexfile)
309 315 indexdata = f.read()
310 316 f.close()
311 317 if len(indexdata) > 0:
312 318 v = struct.unpack(versionformat, indexdata[:4])[0]
313 319 self._initempty = False
314 320 except IOError as inst:
315 321 if inst.errno != errno.ENOENT:
316 322 raise
317 323
318 324 self.version = v
319 325 self._inline = v & REVLOGNGINLINEDATA
320 326 self._generaldelta = v & REVLOGGENERALDELTA
321 327 flags = v & ~0xFFFF
322 328 fmt = v & 0xFFFF
323 329 if fmt == REVLOGV0 and flags:
324 330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
325 331 % (self.indexfile, flags >> 16))
326 332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
327 333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
328 334 % (self.indexfile, flags >> 16))
329 335 elif fmt > REVLOGNG:
330 336 raise RevlogError(_("index %s unknown format %d")
331 337 % (self.indexfile, fmt))
332 338
333 339 self.storedeltachains = True
334 340
335 341 self._io = revlogio()
336 342 if self.version == REVLOGV0:
337 343 self._io = revlogoldio()
338 344 try:
339 345 d = self._io.parseindex(indexdata, self._inline)
340 346 except (ValueError, IndexError):
341 347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
342 348 self.index, nodemap, self._chunkcache = d
343 349 if nodemap is not None:
344 350 self.nodemap = self._nodecache = nodemap
345 351 if not self._chunkcache:
346 352 self._chunkclear()
347 353 # revnum -> (chain-length, sum-delta-length)
348 354 self._chaininfocache = {}
349 355 # revlog header -> revlog compressor
350 356 self._decompressors = {}
351 357
352 358 @util.propertycache
353 359 def _compressor(self):
354 360 return util.compengines[self._compengine].revlogcompressor()
355 361
356 362 def tip(self):
357 363 return self.node(len(self.index) - 2)
358 364 def __contains__(self, rev):
359 365 return 0 <= rev < len(self)
360 366 def __len__(self):
361 367 return len(self.index) - 1
362 368 def __iter__(self):
363 369 return iter(xrange(len(self)))
364 370 def revs(self, start=0, stop=None):
365 371 """iterate over all rev in this revlog (from start to stop)"""
366 372 step = 1
367 373 if stop is not None:
368 374 if start > stop:
369 375 step = -1
370 376 stop += step
371 377 else:
372 378 stop = len(self)
373 379 return xrange(start, stop, step)
374 380
375 381 @util.propertycache
376 382 def nodemap(self):
377 383 self.rev(self.node(0))
378 384 return self._nodecache
379 385
380 386 def hasnode(self, node):
381 387 try:
382 388 self.rev(node)
383 389 return True
384 390 except KeyError:
385 391 return False
386 392
387 393 def clearcaches(self):
388 394 self._cache = None
389 395 self._chainbasecache.clear()
390 396 self._chunkcache = (0, '')
391 397 self._pcache = {}
392 398
393 399 try:
394 400 self._nodecache.clearcaches()
395 401 except AttributeError:
396 402 self._nodecache = {nullid: nullrev}
397 403 self._nodepos = None
398 404
399 405 def rev(self, node):
400 406 try:
401 407 return self._nodecache[node]
402 408 except TypeError:
403 409 raise
404 410 except RevlogError:
405 411 # parsers.c radix tree lookup failed
406 412 raise LookupError(node, self.indexfile, _('no node'))
407 413 except KeyError:
408 414 # pure python cache lookup failed
409 415 n = self._nodecache
410 416 i = self.index
411 417 p = self._nodepos
412 418 if p is None:
413 419 p = len(i) - 2
414 420 for r in xrange(p, -1, -1):
415 421 v = i[r][7]
416 422 n[v] = r
417 423 if v == node:
418 424 self._nodepos = r - 1
419 425 return r
420 426 raise LookupError(node, self.indexfile, _('no node'))
421 427
422 428 # Accessors for index entries.
423 429
424 430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
425 431 # are flags.
426 432 def start(self, rev):
427 433 return int(self.index[rev][0] >> 16)
428 434
429 435 def flags(self, rev):
430 436 return self.index[rev][0] & 0xFFFF
431 437
432 438 def length(self, rev):
433 439 return self.index[rev][1]
434 440
435 441 def rawsize(self, rev):
436 442 """return the length of the uncompressed text for a given revision"""
437 443 l = self.index[rev][2]
438 444 if l >= 0:
439 445 return l
440 446
441 447 t = self.revision(rev)
442 448 return len(t)
443 449 size = rawsize
444 450
445 451 def chainbase(self, rev):
446 452 base = self._chainbasecache.get(rev)
447 453 if base is not None:
448 454 return base
449 455
450 456 index = self.index
451 457 base = index[rev][3]
452 458 while base != rev:
453 459 rev = base
454 460 base = index[rev][3]
455 461
456 462 self._chainbasecache[rev] = base
457 463 return base
458 464
459 465 def linkrev(self, rev):
460 466 return self.index[rev][4]
461 467
462 468 def parentrevs(self, rev):
463 469 return self.index[rev][5:7]
464 470
465 471 def node(self, rev):
466 472 return self.index[rev][7]
467 473
468 474 # Derived from index values.
469 475
470 476 def end(self, rev):
471 477 return self.start(rev) + self.length(rev)
472 478
473 479 def parents(self, node):
474 480 i = self.index
475 481 d = i[self.rev(node)]
476 482 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
477 483
478 484 def chainlen(self, rev):
479 485 return self._chaininfo(rev)[0]
480 486
481 487 def _chaininfo(self, rev):
482 488 chaininfocache = self._chaininfocache
483 489 if rev in chaininfocache:
484 490 return chaininfocache[rev]
485 491 index = self.index
486 492 generaldelta = self._generaldelta
487 493 iterrev = rev
488 494 e = index[iterrev]
489 495 clen = 0
490 496 compresseddeltalen = 0
491 497 while iterrev != e[3]:
492 498 clen += 1
493 499 compresseddeltalen += e[1]
494 500 if generaldelta:
495 501 iterrev = e[3]
496 502 else:
497 503 iterrev -= 1
498 504 if iterrev in chaininfocache:
499 505 t = chaininfocache[iterrev]
500 506 clen += t[0]
501 507 compresseddeltalen += t[1]
502 508 break
503 509 e = index[iterrev]
504 510 else:
505 511 # Add text length of base since decompressing that also takes
506 512 # work. For cache hits the length is already included.
507 513 compresseddeltalen += e[1]
508 514 r = (clen, compresseddeltalen)
509 515 chaininfocache[rev] = r
510 516 return r
511 517
512 518 def _deltachain(self, rev, stoprev=None):
513 519 """Obtain the delta chain for a revision.
514 520
515 521 ``stoprev`` specifies a revision to stop at. If not specified, we
516 522 stop at the base of the chain.
517 523
518 524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
519 525 revs in ascending order and ``stopped`` is a bool indicating whether
520 526 ``stoprev`` was hit.
521 527 """
522 528 chain = []
523 529
524 530 # Alias to prevent attribute lookup in tight loop.
525 531 index = self.index
526 532 generaldelta = self._generaldelta
527 533
528 534 iterrev = rev
529 535 e = index[iterrev]
530 536 while iterrev != e[3] and iterrev != stoprev:
531 537 chain.append(iterrev)
532 538 if generaldelta:
533 539 iterrev = e[3]
534 540 else:
535 541 iterrev -= 1
536 542 e = index[iterrev]
537 543
538 544 if iterrev == stoprev:
539 545 stopped = True
540 546 else:
541 547 chain.append(iterrev)
542 548 stopped = False
543 549
544 550 chain.reverse()
545 551 return chain, stopped
546 552
547 553 def ancestors(self, revs, stoprev=0, inclusive=False):
548 554 """Generate the ancestors of 'revs' in reverse topological order.
549 555 Does not generate revs lower than stoprev.
550 556
551 557 See the documentation for ancestor.lazyancestors for more details."""
552 558
553 559 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
554 560 inclusive=inclusive)
555 561
556 562 def descendants(self, revs):
557 563 """Generate the descendants of 'revs' in revision order.
558 564
559 565 Yield a sequence of revision numbers starting with a child of
560 566 some rev in revs, i.e., each revision is *not* considered a
561 567 descendant of itself. Results are ordered by revision number (a
562 568 topological sort)."""
563 569 first = min(revs)
564 570 if first == nullrev:
565 571 for i in self:
566 572 yield i
567 573 return
568 574
569 575 seen = set(revs)
570 576 for i in self.revs(start=first + 1):
571 577 for x in self.parentrevs(i):
572 578 if x != nullrev and x in seen:
573 579 seen.add(i)
574 580 yield i
575 581 break
576 582
577 583 def findcommonmissing(self, common=None, heads=None):
578 584 """Return a tuple of the ancestors of common and the ancestors of heads
579 585 that are not ancestors of common. In revset terminology, we return the
580 586 tuple:
581 587
582 588 ::common, (::heads) - (::common)
583 589
584 590 The list is sorted by revision number, meaning it is
585 591 topologically sorted.
586 592
587 593 'heads' and 'common' are both lists of node IDs. If heads is
588 594 not supplied, uses all of the revlog's heads. If common is not
589 595 supplied, uses nullid."""
590 596 if common is None:
591 597 common = [nullid]
592 598 if heads is None:
593 599 heads = self.heads()
594 600
595 601 common = [self.rev(n) for n in common]
596 602 heads = [self.rev(n) for n in heads]
597 603
598 604 # we want the ancestors, but inclusive
599 605 class lazyset(object):
600 606 def __init__(self, lazyvalues):
601 607 self.addedvalues = set()
602 608 self.lazyvalues = lazyvalues
603 609
604 610 def __contains__(self, value):
605 611 return value in self.addedvalues or value in self.lazyvalues
606 612
607 613 def __iter__(self):
608 614 added = self.addedvalues
609 615 for r in added:
610 616 yield r
611 617 for r in self.lazyvalues:
612 618 if not r in added:
613 619 yield r
614 620
615 621 def add(self, value):
616 622 self.addedvalues.add(value)
617 623
618 624 def update(self, values):
619 625 self.addedvalues.update(values)
620 626
621 627 has = lazyset(self.ancestors(common))
622 628 has.add(nullrev)
623 629 has.update(common)
624 630
625 631 # take all ancestors from heads that aren't in has
626 632 missing = set()
627 633 visit = collections.deque(r for r in heads if r not in has)
628 634 while visit:
629 635 r = visit.popleft()
630 636 if r in missing:
631 637 continue
632 638 else:
633 639 missing.add(r)
634 640 for p in self.parentrevs(r):
635 641 if p not in has:
636 642 visit.append(p)
637 643 missing = list(missing)
638 644 missing.sort()
639 645 return has, [self.node(miss) for miss in missing]
640 646
641 647 def incrementalmissingrevs(self, common=None):
642 648 """Return an object that can be used to incrementally compute the
643 649 revision numbers of the ancestors of arbitrary sets that are not
644 650 ancestors of common. This is an ancestor.incrementalmissingancestors
645 651 object.
646 652
647 653 'common' is a list of revision numbers. If common is not supplied, uses
648 654 nullrev.
649 655 """
650 656 if common is None:
651 657 common = [nullrev]
652 658
653 659 return ancestor.incrementalmissingancestors(self.parentrevs, common)
654 660
655 661 def findmissingrevs(self, common=None, heads=None):
656 662 """Return the revision numbers of the ancestors of heads that
657 663 are not ancestors of common.
658 664
659 665 More specifically, return a list of revision numbers corresponding to
660 666 nodes N such that every N satisfies the following constraints:
661 667
662 668 1. N is an ancestor of some node in 'heads'
663 669 2. N is not an ancestor of any node in 'common'
664 670
665 671 The list is sorted by revision number, meaning it is
666 672 topologically sorted.
667 673
668 674 'heads' and 'common' are both lists of revision numbers. If heads is
669 675 not supplied, uses all of the revlog's heads. If common is not
670 676 supplied, uses nullid."""
671 677 if common is None:
672 678 common = [nullrev]
673 679 if heads is None:
674 680 heads = self.headrevs()
675 681
676 682 inc = self.incrementalmissingrevs(common=common)
677 683 return inc.missingancestors(heads)
678 684
679 685 def findmissing(self, common=None, heads=None):
680 686 """Return the ancestors of heads that are not ancestors of common.
681 687
682 688 More specifically, return a list of nodes N such that every N
683 689 satisfies the following constraints:
684 690
685 691 1. N is an ancestor of some node in 'heads'
686 692 2. N is not an ancestor of any node in 'common'
687 693
688 694 The list is sorted by revision number, meaning it is
689 695 topologically sorted.
690 696
691 697 'heads' and 'common' are both lists of node IDs. If heads is
692 698 not supplied, uses all of the revlog's heads. If common is not
693 699 supplied, uses nullid."""
694 700 if common is None:
695 701 common = [nullid]
696 702 if heads is None:
697 703 heads = self.heads()
698 704
699 705 common = [self.rev(n) for n in common]
700 706 heads = [self.rev(n) for n in heads]
701 707
702 708 inc = self.incrementalmissingrevs(common=common)
703 709 return [self.node(r) for r in inc.missingancestors(heads)]
704 710
705 711 def nodesbetween(self, roots=None, heads=None):
706 712 """Return a topological path from 'roots' to 'heads'.
707 713
708 714 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
709 715 topologically sorted list of all nodes N that satisfy both of
710 716 these constraints:
711 717
712 718 1. N is a descendant of some node in 'roots'
713 719 2. N is an ancestor of some node in 'heads'
714 720
715 721 Every node is considered to be both a descendant and an ancestor
716 722 of itself, so every reachable node in 'roots' and 'heads' will be
717 723 included in 'nodes'.
718 724
719 725 'outroots' is the list of reachable nodes in 'roots', i.e., the
720 726 subset of 'roots' that is returned in 'nodes'. Likewise,
721 727 'outheads' is the subset of 'heads' that is also in 'nodes'.
722 728
723 729 'roots' and 'heads' are both lists of node IDs. If 'roots' is
724 730 unspecified, uses nullid as the only root. If 'heads' is
725 731 unspecified, uses list of all of the revlog's heads."""
726 732 nonodes = ([], [], [])
727 733 if roots is not None:
728 734 roots = list(roots)
729 735 if not roots:
730 736 return nonodes
731 737 lowestrev = min([self.rev(n) for n in roots])
732 738 else:
733 739 roots = [nullid] # Everybody's a descendant of nullid
734 740 lowestrev = nullrev
735 741 if (lowestrev == nullrev) and (heads is None):
736 742 # We want _all_ the nodes!
737 743 return ([self.node(r) for r in self], [nullid], list(self.heads()))
738 744 if heads is None:
739 745 # All nodes are ancestors, so the latest ancestor is the last
740 746 # node.
741 747 highestrev = len(self) - 1
742 748 # Set ancestors to None to signal that every node is an ancestor.
743 749 ancestors = None
744 750 # Set heads to an empty dictionary for later discovery of heads
745 751 heads = {}
746 752 else:
747 753 heads = list(heads)
748 754 if not heads:
749 755 return nonodes
750 756 ancestors = set()
751 757 # Turn heads into a dictionary so we can remove 'fake' heads.
752 758 # Also, later we will be using it to filter out the heads we can't
753 759 # find from roots.
754 760 heads = dict.fromkeys(heads, False)
755 761 # Start at the top and keep marking parents until we're done.
756 762 nodestotag = set(heads)
757 763 # Remember where the top was so we can use it as a limit later.
758 764 highestrev = max([self.rev(n) for n in nodestotag])
759 765 while nodestotag:
760 766 # grab a node to tag
761 767 n = nodestotag.pop()
762 768 # Never tag nullid
763 769 if n == nullid:
764 770 continue
765 771 # A node's revision number represents its place in a
766 772 # topologically sorted list of nodes.
767 773 r = self.rev(n)
768 774 if r >= lowestrev:
769 775 if n not in ancestors:
770 776 # If we are possibly a descendant of one of the roots
771 777 # and we haven't already been marked as an ancestor
772 778 ancestors.add(n) # Mark as ancestor
773 779 # Add non-nullid parents to list of nodes to tag.
774 780 nodestotag.update([p for p in self.parents(n) if
775 781 p != nullid])
776 782 elif n in heads: # We've seen it before, is it a fake head?
777 783 # So it is, real heads should not be the ancestors of
778 784 # any other heads.
779 785 heads.pop(n)
780 786 if not ancestors:
781 787 return nonodes
782 788 # Now that we have our set of ancestors, we want to remove any
783 789 # roots that are not ancestors.
784 790
785 791 # If one of the roots was nullid, everything is included anyway.
786 792 if lowestrev > nullrev:
787 793 # But, since we weren't, let's recompute the lowest rev to not
788 794 # include roots that aren't ancestors.
789 795
790 796 # Filter out roots that aren't ancestors of heads
791 797 roots = [root for root in roots if root in ancestors]
792 798 # Recompute the lowest revision
793 799 if roots:
794 800 lowestrev = min([self.rev(root) for root in roots])
795 801 else:
796 802 # No more roots? Return empty list
797 803 return nonodes
798 804 else:
799 805 # We are descending from nullid, and don't need to care about
800 806 # any other roots.
801 807 lowestrev = nullrev
802 808 roots = [nullid]
803 809 # Transform our roots list into a set.
804 810 descendants = set(roots)
805 811 # Also, keep the original roots so we can filter out roots that aren't
806 812 # 'real' roots (i.e. are descended from other roots).
807 813 roots = descendants.copy()
808 814 # Our topologically sorted list of output nodes.
809 815 orderedout = []
810 816 # Don't start at nullid since we don't want nullid in our output list,
811 817 # and if nullid shows up in descendants, empty parents will look like
812 818 # they're descendants.
813 819 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
814 820 n = self.node(r)
815 821 isdescendant = False
816 822 if lowestrev == nullrev: # Everybody is a descendant of nullid
817 823 isdescendant = True
818 824 elif n in descendants:
819 825 # n is already a descendant
820 826 isdescendant = True
821 827 # This check only needs to be done here because all the roots
822 828 # will start being marked is descendants before the loop.
823 829 if n in roots:
824 830 # If n was a root, check if it's a 'real' root.
825 831 p = tuple(self.parents(n))
826 832 # If any of its parents are descendants, it's not a root.
827 833 if (p[0] in descendants) or (p[1] in descendants):
828 834 roots.remove(n)
829 835 else:
830 836 p = tuple(self.parents(n))
831 837 # A node is a descendant if either of its parents are
832 838 # descendants. (We seeded the dependents list with the roots
833 839 # up there, remember?)
834 840 if (p[0] in descendants) or (p[1] in descendants):
835 841 descendants.add(n)
836 842 isdescendant = True
837 843 if isdescendant and ((ancestors is None) or (n in ancestors)):
838 844 # Only include nodes that are both descendants and ancestors.
839 845 orderedout.append(n)
840 846 if (ancestors is not None) and (n in heads):
841 847 # We're trying to figure out which heads are reachable
842 848 # from roots.
843 849 # Mark this head as having been reached
844 850 heads[n] = True
845 851 elif ancestors is None:
846 852 # Otherwise, we're trying to discover the heads.
847 853 # Assume this is a head because if it isn't, the next step
848 854 # will eventually remove it.
849 855 heads[n] = True
850 856 # But, obviously its parents aren't.
851 857 for p in self.parents(n):
852 858 heads.pop(p, None)
853 859 heads = [head for head, flag in heads.iteritems() if flag]
854 860 roots = list(roots)
855 861 assert orderedout
856 862 assert roots
857 863 assert heads
858 864 return (orderedout, roots, heads)
859 865
860 866 def headrevs(self):
861 867 try:
862 868 return self.index.headrevs()
863 869 except AttributeError:
864 870 return self._headrevs()
865 871
866 872 def computephases(self, roots):
867 873 return self.index.computephasesmapsets(roots)
868 874
869 875 def _headrevs(self):
870 876 count = len(self)
871 877 if not count:
872 878 return [nullrev]
873 879 # we won't iter over filtered rev so nobody is a head at start
874 880 ishead = [0] * (count + 1)
875 881 index = self.index
876 882 for r in self:
877 883 ishead[r] = 1 # I may be an head
878 884 e = index[r]
879 885 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
880 886 return [r for r, val in enumerate(ishead) if val]
881 887
882 888 def heads(self, start=None, stop=None):
883 889 """return the list of all nodes that have no children
884 890
885 891 if start is specified, only heads that are descendants of
886 892 start will be returned
887 893 if stop is specified, it will consider all the revs from stop
888 894 as if they had no children
889 895 """
890 896 if start is None and stop is None:
891 897 if not len(self):
892 898 return [nullid]
893 899 return [self.node(r) for r in self.headrevs()]
894 900
895 901 if start is None:
896 902 start = nullid
897 903 if stop is None:
898 904 stop = []
899 905 stoprevs = set([self.rev(n) for n in stop])
900 906 startrev = self.rev(start)
901 907 reachable = set((startrev,))
902 908 heads = set((startrev,))
903 909
904 910 parentrevs = self.parentrevs
905 911 for r in self.revs(start=startrev + 1):
906 912 for p in parentrevs(r):
907 913 if p in reachable:
908 914 if r not in stoprevs:
909 915 reachable.add(r)
910 916 heads.add(r)
911 917 if p in heads and p not in stoprevs:
912 918 heads.remove(p)
913 919
914 920 return [self.node(r) for r in heads]
915 921
916 922 def children(self, node):
917 923 """find the children of a given node"""
918 924 c = []
919 925 p = self.rev(node)
920 926 for r in self.revs(start=p + 1):
921 927 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
922 928 if prevs:
923 929 for pr in prevs:
924 930 if pr == p:
925 931 c.append(self.node(r))
926 932 elif p == nullrev:
927 933 c.append(self.node(r))
928 934 return c
929 935
930 936 def descendant(self, start, end):
931 937 if start == nullrev:
932 938 return True
933 939 for i in self.descendants([start]):
934 940 if i == end:
935 941 return True
936 942 elif i > end:
937 943 break
938 944 return False
939 945
940 946 def commonancestorsheads(self, a, b):
941 947 """calculate all the heads of the common ancestors of nodes a and b"""
942 948 a, b = self.rev(a), self.rev(b)
943 949 try:
944 950 ancs = self.index.commonancestorsheads(a, b)
945 951 except (AttributeError, OverflowError): # C implementation failed
946 952 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
947 953 return pycompat.maplist(self.node, ancs)
948 954
949 955 def isancestor(self, a, b):
950 956 """return True if node a is an ancestor of node b
951 957
952 958 The implementation of this is trivial but the use of
953 959 commonancestorsheads is not."""
954 960 return a in self.commonancestorsheads(a, b)
955 961
956 962 def ancestor(self, a, b):
957 963 """calculate the "best" common ancestor of nodes a and b"""
958 964
959 965 a, b = self.rev(a), self.rev(b)
960 966 try:
961 967 ancs = self.index.ancestors(a, b)
962 968 except (AttributeError, OverflowError):
963 969 ancs = ancestor.ancestors(self.parentrevs, a, b)
964 970 if ancs:
965 971 # choose a consistent winner when there's a tie
966 972 return min(map(self.node, ancs))
967 973 return nullid
968 974
969 975 def _match(self, id):
970 976 if isinstance(id, int):
971 977 # rev
972 978 return self.node(id)
973 979 if len(id) == 20:
974 980 # possibly a binary node
975 981 # odds of a binary node being all hex in ASCII are 1 in 10**25
976 982 try:
977 983 node = id
978 984 self.rev(node) # quick search the index
979 985 return node
980 986 except LookupError:
981 987 pass # may be partial hex id
982 988 try:
983 989 # str(rev)
984 990 rev = int(id)
985 991 if str(rev) != id:
986 992 raise ValueError
987 993 if rev < 0:
988 994 rev = len(self) + rev
989 995 if rev < 0 or rev >= len(self):
990 996 raise ValueError
991 997 return self.node(rev)
992 998 except (ValueError, OverflowError):
993 999 pass
994 1000 if len(id) == 40:
995 1001 try:
996 1002 # a full hex nodeid?
997 1003 node = bin(id)
998 1004 self.rev(node)
999 1005 return node
1000 1006 except (TypeError, LookupError):
1001 1007 pass
1002 1008
1003 1009 def _partialmatch(self, id):
1004 1010 try:
1005 1011 partial = self.index.partialmatch(id)
1006 1012 if partial and self.hasnode(partial):
1007 1013 return partial
1008 1014 return None
1009 1015 except RevlogError:
1010 1016 # parsers.c radix tree lookup gave multiple matches
1011 1017 # fast path: for unfiltered changelog, radix tree is accurate
1012 1018 if not getattr(self, 'filteredrevs', None):
1013 1019 raise LookupError(id, self.indexfile,
1014 1020 _('ambiguous identifier'))
1015 1021 # fall through to slow path that filters hidden revisions
1016 1022 except (AttributeError, ValueError):
1017 1023 # we are pure python, or key was too short to search radix tree
1018 1024 pass
1019 1025
1020 1026 if id in self._pcache:
1021 1027 return self._pcache[id]
1022 1028
1023 1029 if len(id) < 40:
1024 1030 try:
1025 1031 # hex(node)[:...]
1026 1032 l = len(id) // 2 # grab an even number of digits
1027 1033 prefix = bin(id[:l * 2])
1028 1034 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1029 1035 nl = [n for n in nl if hex(n).startswith(id) and
1030 1036 self.hasnode(n)]
1031 1037 if len(nl) > 0:
1032 1038 if len(nl) == 1:
1033 1039 self._pcache[id] = nl[0]
1034 1040 return nl[0]
1035 1041 raise LookupError(id, self.indexfile,
1036 1042 _('ambiguous identifier'))
1037 1043 return None
1038 1044 except TypeError:
1039 1045 pass
1040 1046
1041 1047 def lookup(self, id):
1042 1048 """locate a node based on:
1043 1049 - revision number or str(revision number)
1044 1050 - nodeid or subset of hex nodeid
1045 1051 """
1046 1052 n = self._match(id)
1047 1053 if n is not None:
1048 1054 return n
1049 1055 n = self._partialmatch(id)
1050 1056 if n:
1051 1057 return n
1052 1058
1053 1059 raise LookupError(id, self.indexfile, _('no match found'))
1054 1060
1055 1061 def cmp(self, node, text):
1056 1062 """compare text with a given file revision
1057 1063
1058 1064 returns True if text is different than what is stored.
1059 1065 """
1060 1066 p1, p2 = self.parents(node)
1061 1067 return hash(text, p1, p2) != node
1062 1068
1063 1069 def _addchunk(self, offset, data):
1064 1070 """Add a segment to the revlog cache.
1065 1071
1066 1072 Accepts an absolute offset and the data that is at that location.
1067 1073 """
1068 1074 o, d = self._chunkcache
1069 1075 # try to add to existing cache
1070 1076 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1071 1077 self._chunkcache = o, d + data
1072 1078 else:
1073 1079 self._chunkcache = offset, data
1074 1080
1075 1081 def _loadchunk(self, offset, length, df=None):
1076 1082 """Load a segment of raw data from the revlog.
1077 1083
1078 1084 Accepts an absolute offset, length to read, and an optional existing
1079 1085 file handle to read from.
1080 1086
1081 1087 If an existing file handle is passed, it will be seeked and the
1082 1088 original seek position will NOT be restored.
1083 1089
1084 1090 Returns a str or buffer of raw byte data.
1085 1091 """
1086 1092 if df is not None:
1087 1093 closehandle = False
1088 1094 else:
1089 1095 if self._inline:
1090 1096 df = self.opener(self.indexfile)
1091 1097 else:
1092 1098 df = self.opener(self.datafile)
1093 1099 closehandle = True
1094 1100
1095 1101 # Cache data both forward and backward around the requested
1096 1102 # data, in a fixed size window. This helps speed up operations
1097 1103 # involving reading the revlog backwards.
1098 1104 cachesize = self._chunkcachesize
1099 1105 realoffset = offset & ~(cachesize - 1)
1100 1106 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1101 1107 - realoffset)
1102 1108 df.seek(realoffset)
1103 1109 d = df.read(reallength)
1104 1110 if closehandle:
1105 1111 df.close()
1106 1112 self._addchunk(realoffset, d)
1107 1113 if offset != realoffset or reallength != length:
1108 1114 return util.buffer(d, offset - realoffset, length)
1109 1115 return d
1110 1116
1111 1117 def _getchunk(self, offset, length, df=None):
1112 1118 """Obtain a segment of raw data from the revlog.
1113 1119
1114 1120 Accepts an absolute offset, length of bytes to obtain, and an
1115 1121 optional file handle to the already-opened revlog. If the file
1116 1122 handle is used, it's original seek position will not be preserved.
1117 1123
1118 1124 Requests for data may be returned from a cache.
1119 1125
1120 1126 Returns a str or a buffer instance of raw byte data.
1121 1127 """
1122 1128 o, d = self._chunkcache
1123 1129 l = len(d)
1124 1130
1125 1131 # is it in the cache?
1126 1132 cachestart = offset - o
1127 1133 cacheend = cachestart + length
1128 1134 if cachestart >= 0 and cacheend <= l:
1129 1135 if cachestart == 0 and cacheend == l:
1130 1136 return d # avoid a copy
1131 1137 return util.buffer(d, cachestart, cacheend - cachestart)
1132 1138
1133 1139 return self._loadchunk(offset, length, df=df)
1134 1140
1135 1141 def _chunkraw(self, startrev, endrev, df=None):
1136 1142 """Obtain a segment of raw data corresponding to a range of revisions.
1137 1143
1138 1144 Accepts the start and end revisions and an optional already-open
1139 1145 file handle to be used for reading. If the file handle is read, its
1140 1146 seek position will not be preserved.
1141 1147
1142 1148 Requests for data may be satisfied by a cache.
1143 1149
1144 1150 Returns a 2-tuple of (offset, data) for the requested range of
1145 1151 revisions. Offset is the integer offset from the beginning of the
1146 1152 revlog and data is a str or buffer of the raw byte data.
1147 1153
1148 1154 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1149 1155 to determine where each revision's data begins and ends.
1150 1156 """
1151 1157 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1152 1158 # (functions are expensive).
1153 1159 index = self.index
1154 1160 istart = index[startrev]
1155 1161 start = int(istart[0] >> 16)
1156 1162 if startrev == endrev:
1157 1163 end = start + istart[1]
1158 1164 else:
1159 1165 iend = index[endrev]
1160 1166 end = int(iend[0] >> 16) + iend[1]
1161 1167
1162 1168 if self._inline:
1163 1169 start += (startrev + 1) * self._io.size
1164 1170 end += (endrev + 1) * self._io.size
1165 1171 length = end - start
1166 1172
1167 1173 return start, self._getchunk(start, length, df=df)
1168 1174
1169 1175 def _chunk(self, rev, df=None):
1170 1176 """Obtain a single decompressed chunk for a revision.
1171 1177
1172 1178 Accepts an integer revision and an optional already-open file handle
1173 1179 to be used for reading. If used, the seek position of the file will not
1174 1180 be preserved.
1175 1181
1176 1182 Returns a str holding uncompressed data for the requested revision.
1177 1183 """
1178 1184 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1179 1185
1180 1186 def _chunks(self, revs, df=None):
1181 1187 """Obtain decompressed chunks for the specified revisions.
1182 1188
1183 1189 Accepts an iterable of numeric revisions that are assumed to be in
1184 1190 ascending order. Also accepts an optional already-open file handle
1185 1191 to be used for reading. If used, the seek position of the file will
1186 1192 not be preserved.
1187 1193
1188 1194 This function is similar to calling ``self._chunk()`` multiple times,
1189 1195 but is faster.
1190 1196
1191 1197 Returns a list with decompressed data for each requested revision.
1192 1198 """
1193 1199 if not revs:
1194 1200 return []
1195 1201 start = self.start
1196 1202 length = self.length
1197 1203 inline = self._inline
1198 1204 iosize = self._io.size
1199 1205 buffer = util.buffer
1200 1206
1201 1207 l = []
1202 1208 ladd = l.append
1203 1209
1204 1210 try:
1205 1211 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1206 1212 except OverflowError:
1207 1213 # issue4215 - we can't cache a run of chunks greater than
1208 1214 # 2G on Windows
1209 1215 return [self._chunk(rev, df=df) for rev in revs]
1210 1216
1211 1217 decomp = self.decompress
1212 1218 for rev in revs:
1213 1219 chunkstart = start(rev)
1214 1220 if inline:
1215 1221 chunkstart += (rev + 1) * iosize
1216 1222 chunklength = length(rev)
1217 1223 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1218 1224
1219 1225 return l
1220 1226
1221 1227 def _chunkclear(self):
1222 1228 """Clear the raw chunk cache."""
1223 1229 self._chunkcache = (0, '')
1224 1230
1225 1231 def deltaparent(self, rev):
1226 1232 """return deltaparent of the given revision"""
1227 1233 base = self.index[rev][3]
1228 1234 if base == rev:
1229 1235 return nullrev
1230 1236 elif self._generaldelta:
1231 1237 return base
1232 1238 else:
1233 1239 return rev - 1
1234 1240
1235 1241 def revdiff(self, rev1, rev2):
1236 1242 """return or calculate a delta between two revisions"""
1237 1243 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1238 1244 return bytes(self._chunk(rev2))
1239 1245
1240 1246 return mdiff.textdiff(self.revision(rev1),
1241 1247 self.revision(rev2))
1242 1248
1243 1249 def revision(self, nodeorrev, _df=None, raw=False):
1244 1250 """return an uncompressed revision of a given node or revision
1245 1251 number.
1246 1252
1247 1253 _df - an existing file handle to read from. (internal-only)
1248 1254 raw - an optional argument specifying if the revision data is to be
1249 1255 treated as raw data when applying flag transforms. 'raw' should be set
1250 1256 to True when generating changegroups or in debug commands.
1251 1257 """
1252 1258 if isinstance(nodeorrev, int):
1253 1259 rev = nodeorrev
1254 1260 node = self.node(rev)
1255 1261 else:
1256 1262 node = nodeorrev
1257 1263 rev = None
1258 1264
1259 1265 cachedrev = None
1260 1266 if node == nullid:
1261 1267 return ""
1262 1268 if self._cache:
1263 1269 if self._cache[0] == node:
1264 1270 return self._cache[2]
1265 1271 cachedrev = self._cache[1]
1266 1272
1267 1273 # look up what we need to read
1268 1274 text = None
1269 1275 if rev is None:
1270 1276 rev = self.rev(node)
1271 1277
1272 1278 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1273 1279 if stopped:
1274 1280 text = self._cache[2]
1275 1281
1276 1282 # drop cache to save memory
1277 1283 self._cache = None
1278 1284
1279 1285 bins = self._chunks(chain, df=_df)
1280 1286 if text is None:
1281 1287 text = bytes(bins[0])
1282 1288 bins = bins[1:]
1283 1289
1284 1290 text = mdiff.patches(text, bins)
1285 1291
1286 1292 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1287 1293 raw=raw)
1288 1294 if validatehash:
1289 1295 self.checkhash(text, node, rev=rev)
1290 1296
1291 1297 self._cache = (node, rev, text)
1292 1298 return text
1293 1299
1294 1300 def hash(self, text, p1, p2):
1295 1301 """Compute a node hash.
1296 1302
1297 1303 Available as a function so that subclasses can replace the hash
1298 1304 as needed.
1299 1305 """
1300 1306 return hash(text, p1, p2)
1301 1307
1302 1308 def _processflags(self, text, flags, operation, raw=False):
1303 1309 """Inspect revision data flags and applies transforms defined by
1304 1310 registered flag processors.
1305 1311
1306 1312 ``text`` - the revision data to process
1307 1313 ``flags`` - the revision flags
1308 1314 ``operation`` - the operation being performed (read or write)
1309 1315 ``raw`` - an optional argument describing if the raw transform should be
1310 1316 applied.
1311 1317
1312 1318 This method processes the flags in the order (or reverse order if
1313 1319 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1314 1320 flag processors registered for present flags. The order of flags defined
1315 1321 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1316 1322
1317 1323 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1318 1324 processed text and ``validatehash`` is a bool indicating whether the
1319 1325 returned text should be checked for hash integrity.
1320 1326
1321 1327 Note: If the ``raw`` argument is set, it has precedence over the
1322 1328 operation and will only update the value of ``validatehash``.
1323 1329 """
1324 1330 if not operation in ('read', 'write'):
1325 1331 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1326 1332 # Check all flags are known.
1327 1333 if flags & ~REVIDX_KNOWN_FLAGS:
1328 1334 raise RevlogError(_("incompatible revision flag '%#x'") %
1329 1335 (flags & ~REVIDX_KNOWN_FLAGS))
1330 1336 validatehash = True
1331 1337 # Depending on the operation (read or write), the order might be
1332 1338 # reversed due to non-commutative transforms.
1333 1339 orderedflags = REVIDX_FLAGS_ORDER
1334 1340 if operation == 'write':
1335 1341 orderedflags = reversed(orderedflags)
1336 1342
1337 1343 for flag in orderedflags:
1338 1344 # If a flagprocessor has been registered for a known flag, apply the
1339 1345 # related operation transform and update result tuple.
1340 1346 if flag & flags:
1341 1347 vhash = True
1342 1348
1343 1349 if flag not in _flagprocessors:
1344 1350 message = _("missing processor for flag '%#x'") % (flag)
1345 1351 raise RevlogError(message)
1346 1352
1347 1353 processor = _flagprocessors[flag]
1348 1354 if processor is not None:
1349 1355 readtransform, writetransform, rawtransform = processor
1350 1356
1351 1357 if raw:
1352 1358 vhash = rawtransform(self, text)
1353 1359 elif operation == 'read':
1354 1360 text, vhash = readtransform(self, text)
1355 1361 else: # write operation
1356 1362 text, vhash = writetransform(self, text)
1357 1363 validatehash = validatehash and vhash
1358 1364
1359 1365 return text, validatehash
1360 1366
1361 1367 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1362 1368 """Check node hash integrity.
1363 1369
1364 1370 Available as a function so that subclasses can extend hash mismatch
1365 1371 behaviors as needed.
1366 1372 """
1367 1373 if p1 is None and p2 is None:
1368 1374 p1, p2 = self.parents(node)
1369 1375 if node != self.hash(text, p1, p2):
1370 1376 revornode = rev
1371 1377 if revornode is None:
1372 1378 revornode = templatefilters.short(hex(node))
1373 1379 raise RevlogError(_("integrity check failed on %s:%s")
1374 1380 % (self.indexfile, revornode))
1375 1381
1376 1382 def checkinlinesize(self, tr, fp=None):
1377 1383 """Check if the revlog is too big for inline and convert if so.
1378 1384
1379 1385 This should be called after revisions are added to the revlog. If the
1380 1386 revlog has grown too large to be an inline revlog, it will convert it
1381 1387 to use multiple index and data files.
1382 1388 """
1383 1389 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1384 1390 return
1385 1391
1386 1392 trinfo = tr.find(self.indexfile)
1387 1393 if trinfo is None:
1388 1394 raise RevlogError(_("%s not found in the transaction")
1389 1395 % self.indexfile)
1390 1396
1391 1397 trindex = trinfo[2]
1392 1398 if trindex is not None:
1393 1399 dataoff = self.start(trindex)
1394 1400 else:
1395 1401 # revlog was stripped at start of transaction, use all leftover data
1396 1402 trindex = len(self) - 1
1397 1403 dataoff = self.end(-2)
1398 1404
1399 1405 tr.add(self.datafile, dataoff)
1400 1406
1401 1407 if fp:
1402 1408 fp.flush()
1403 1409 fp.close()
1404 1410
1405 1411 df = self.opener(self.datafile, 'w')
1406 1412 try:
1407 1413 for r in self:
1408 1414 df.write(self._chunkraw(r, r)[1])
1409 1415 finally:
1410 1416 df.close()
1411 1417
1412 1418 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1413 1419 checkambig=self._checkambig)
1414 1420 self.version &= ~(REVLOGNGINLINEDATA)
1415 1421 self._inline = False
1416 1422 for i in self:
1417 1423 e = self._io.packentry(self.index[i], self.node, self.version, i)
1418 1424 fp.write(e)
1419 1425
1420 1426 # if we don't call close, the temp file will never replace the
1421 1427 # real index
1422 1428 fp.close()
1423 1429
1424 1430 tr.replace(self.indexfile, trindex * self._io.size)
1425 1431 self._chunkclear()
1426 1432
1427 1433 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1428 1434 node=None, flags=REVIDX_DEFAULT_FLAGS):
1429 1435 """add a revision to the log
1430 1436
1431 1437 text - the revision data to add
1432 1438 transaction - the transaction object used for rollback
1433 1439 link - the linkrev data to add
1434 1440 p1, p2 - the parent nodeids of the revision
1435 1441 cachedelta - an optional precomputed delta
1436 1442 node - nodeid of revision; typically node is not specified, and it is
1437 1443 computed by default as hash(text, p1, p2), however subclasses might
1438 1444 use different hashing method (and override checkhash() in such case)
1439 1445 flags - the known flags to set on the revision
1440 1446 """
1441 1447 if link == nullrev:
1442 1448 raise RevlogError(_("attempted to add linkrev -1 to %s")
1443 1449 % self.indexfile)
1444 1450
1445 1451 if flags:
1446 1452 node = node or self.hash(text, p1, p2)
1447 1453
1448 1454 newtext, validatehash = self._processflags(text, flags, 'write')
1449 1455
1450 1456 # If the flag processor modifies the revision data, ignore any provided
1451 1457 # cachedelta.
1452 1458 if newtext != text:
1453 1459 cachedelta = None
1454 1460 text = newtext
1455 1461
1456 1462 if len(text) > _maxentrysize:
1457 1463 raise RevlogError(
1458 1464 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1459 1465 % (self.indexfile, len(text)))
1460 1466
1461 1467 node = node or self.hash(text, p1, p2)
1462 1468 if node in self.nodemap:
1463 1469 return node
1464 1470
1465 1471 if validatehash:
1466 1472 self.checkhash(text, node, p1=p1, p2=p2)
1467 1473
1468 1474 dfh = None
1469 1475 if not self._inline:
1470 1476 dfh = self.opener(self.datafile, "a+")
1471 1477 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1472 1478 try:
1473 1479 return self._addrevision(node, text, transaction, link, p1, p2,
1474 1480 flags, cachedelta, ifh, dfh)
1475 1481 finally:
1476 1482 if dfh:
1477 1483 dfh.close()
1478 1484 ifh.close()
1479 1485
1480 1486 def compress(self, data):
1481 1487 """Generate a possibly-compressed representation of data."""
1482 1488 if not data:
1483 1489 return '', data
1484 1490
1485 1491 compressed = self._compressor.compress(data)
1486 1492
1487 1493 if compressed:
1488 1494 # The revlog compressor added the header in the returned data.
1489 1495 return '', compressed
1490 1496
1491 1497 if data[0:1] == '\0':
1492 1498 return '', data
1493 1499 return 'u', data
1494 1500
1495 1501 def decompress(self, data):
1496 1502 """Decompress a revlog chunk.
1497 1503
1498 1504 The chunk is expected to begin with a header identifying the
1499 1505 format type so it can be routed to an appropriate decompressor.
1500 1506 """
1501 1507 if not data:
1502 1508 return data
1503 1509
1504 1510 # Revlogs are read much more frequently than they are written and many
1505 1511 # chunks only take microseconds to decompress, so performance is
1506 1512 # important here.
1507 1513 #
1508 1514 # We can make a few assumptions about revlogs:
1509 1515 #
1510 1516 # 1) the majority of chunks will be compressed (as opposed to inline
1511 1517 # raw data).
1512 1518 # 2) decompressing *any* data will likely by at least 10x slower than
1513 1519 # returning raw inline data.
1514 1520 # 3) we want to prioritize common and officially supported compression
1515 1521 # engines
1516 1522 #
1517 1523 # It follows that we want to optimize for "decompress compressed data
1518 1524 # when encoded with common and officially supported compression engines"
1519 1525 # case over "raw data" and "data encoded by less common or non-official
1520 1526 # compression engines." That is why we have the inline lookup first
1521 1527 # followed by the compengines lookup.
1522 1528 #
1523 1529 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1524 1530 # compressed chunks. And this matters for changelog and manifest reads.
1525 1531 t = data[0:1]
1526 1532
1527 1533 if t == 'x':
1528 1534 try:
1529 1535 return _zlibdecompress(data)
1530 1536 except zlib.error as e:
1531 1537 raise RevlogError(_('revlog decompress error: %s') % str(e))
1532 1538 # '\0' is more common than 'u' so it goes first.
1533 1539 elif t == '\0':
1534 1540 return data
1535 1541 elif t == 'u':
1536 1542 return util.buffer(data, 1)
1537 1543
1538 1544 try:
1539 1545 compressor = self._decompressors[t]
1540 1546 except KeyError:
1541 1547 try:
1542 1548 engine = util.compengines.forrevlogheader(t)
1543 1549 compressor = engine.revlogcompressor()
1544 1550 self._decompressors[t] = compressor
1545 1551 except KeyError:
1546 1552 raise RevlogError(_('unknown compression type %r') % t)
1547 1553
1548 1554 return compressor.decompress(data)
1549 1555
1550 1556 def _isgooddelta(self, d, textlen):
1551 1557 """Returns True if the given delta is good. Good means that it is within
1552 1558 the disk span, disk size, and chain length bounds that we know to be
1553 1559 performant."""
1554 1560 if d is None:
1555 1561 return False
1556 1562
1557 1563 # - 'dist' is the distance from the base revision -- bounding it limits
1558 1564 # the amount of I/O we need to do.
1559 1565 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1560 1566 # to apply -- bounding it limits the amount of CPU we consume.
1561 1567 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1562 1568 if (dist > textlen * 4 or l > textlen or
1563 1569 compresseddeltalen > textlen * 2 or
1564 1570 (self._maxchainlen and chainlen > self._maxchainlen)):
1565 1571 return False
1566 1572
1567 1573 return True
1568 1574
1569 1575 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1570 1576 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1571 1577 """internal function to add revisions to the log
1572 1578
1573 1579 see addrevision for argument descriptions.
1574 1580 invariants:
1575 1581 - text is optional (can be None); if not set, cachedelta must be set.
1576 1582 if both are set, they must correspond to each other.
1577 1583 - raw is optional; if set to True, it indicates the revision data is to
1578 1584 be treated by _processflags() as raw. It is usually set by changegroup
1579 1585 generation and debug commands.
1580 1586 """
1581 1587 btext = [text]
1582 1588 def buildtext():
1583 1589 if btext[0] is not None:
1584 1590 return btext[0]
1585 1591 baserev = cachedelta[0]
1586 1592 delta = cachedelta[1]
1587 1593 # special case deltas which replace entire base; no need to decode
1588 1594 # base revision. this neatly avoids censored bases, which throw when
1589 1595 # they're decoded.
1590 1596 hlen = struct.calcsize(">lll")
1591 1597 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1592 1598 len(delta) - hlen):
1593 1599 btext[0] = delta[hlen:]
1594 1600 else:
1595 1601 if self._inline:
1596 1602 fh = ifh
1597 1603 else:
1598 1604 fh = dfh
1599 1605 basetext = self.revision(baserev, _df=fh, raw=raw)
1600 1606 btext[0] = mdiff.patch(basetext, delta)
1601 1607
1602 1608 try:
1603 1609 res = self._processflags(btext[0], flags, 'read', raw=raw)
1604 1610 btext[0], validatehash = res
1605 1611 if validatehash:
1606 1612 self.checkhash(btext[0], node, p1=p1, p2=p2)
1607 1613 if flags & REVIDX_ISCENSORED:
1608 1614 raise RevlogError(_('node %s is not censored') % node)
1609 1615 except CensoredNodeError:
1610 1616 # must pass the censored index flag to add censored revisions
1611 1617 if not flags & REVIDX_ISCENSORED:
1612 1618 raise
1613 1619 return btext[0]
1614 1620
1615 1621 def builddelta(rev):
1616 1622 # can we use the cached delta?
1617 1623 if cachedelta and cachedelta[0] == rev:
1618 1624 delta = cachedelta[1]
1619 1625 else:
1620 1626 t = buildtext()
1621 1627 if self.iscensored(rev):
1622 1628 # deltas based on a censored revision must replace the
1623 1629 # full content in one patch, so delta works everywhere
1624 1630 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1625 1631 delta = header + t
1626 1632 else:
1627 1633 if self._inline:
1628 1634 fh = ifh
1629 1635 else:
1630 1636 fh = dfh
1631 1637 ptext = self.revision(rev, _df=fh)
1632 1638 delta = mdiff.textdiff(ptext, t)
1633 1639 header, data = self.compress(delta)
1634 1640 deltalen = len(header) + len(data)
1635 1641 chainbase = self.chainbase(rev)
1636 1642 dist = deltalen + offset - self.start(chainbase)
1637 1643 if self._generaldelta:
1638 1644 base = rev
1639 1645 else:
1640 1646 base = chainbase
1641 1647 chainlen, compresseddeltalen = self._chaininfo(rev)
1642 1648 chainlen += 1
1643 1649 compresseddeltalen += deltalen
1644 1650 return (dist, deltalen, (header, data), base,
1645 1651 chainbase, chainlen, compresseddeltalen)
1646 1652
1647 1653 curr = len(self)
1648 1654 prev = curr - 1
1649 1655 offset = self.end(prev)
1650 1656 delta = None
1651 1657 p1r, p2r = self.rev(p1), self.rev(p2)
1652 1658
1653 1659 # full versions are inserted when the needed deltas
1654 1660 # become comparable to the uncompressed text
1655 1661 if text is None:
1656 1662 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1657 1663 cachedelta[1])
1658 1664 else:
1659 1665 textlen = len(text)
1660 1666
1661 1667 # should we try to build a delta?
1662 1668 if prev != nullrev and self.storedeltachains:
1663 1669 tested = set()
1664 1670 # This condition is true most of the time when processing
1665 1671 # changegroup data into a generaldelta repo. The only time it
1666 1672 # isn't true is if this is the first revision in a delta chain
1667 1673 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1668 1674 if cachedelta and self._generaldelta and self._lazydeltabase:
1669 1675 # Assume what we received from the server is a good choice
1670 1676 # build delta will reuse the cache
1671 1677 candidatedelta = builddelta(cachedelta[0])
1672 1678 tested.add(cachedelta[0])
1673 1679 if self._isgooddelta(candidatedelta, textlen):
1674 1680 delta = candidatedelta
1675 1681 if delta is None and self._generaldelta:
1676 1682 # exclude already lazy tested base if any
1677 1683 parents = [p for p in (p1r, p2r)
1678 1684 if p != nullrev and p not in tested]
1679 1685 if parents and not self._aggressivemergedeltas:
1680 1686 # Pick whichever parent is closer to us (to minimize the
1681 1687 # chance of having to build a fulltext).
1682 1688 parents = [max(parents)]
1683 1689 tested.update(parents)
1684 1690 pdeltas = []
1685 1691 for p in parents:
1686 1692 pd = builddelta(p)
1687 1693 if self._isgooddelta(pd, textlen):
1688 1694 pdeltas.append(pd)
1689 1695 if pdeltas:
1690 1696 delta = min(pdeltas, key=lambda x: x[1])
1691 1697 if delta is None and prev not in tested:
1692 1698 # other approach failed try against prev to hopefully save us a
1693 1699 # fulltext.
1694 1700 candidatedelta = builddelta(prev)
1695 1701 if self._isgooddelta(candidatedelta, textlen):
1696 1702 delta = candidatedelta
1697 1703 if delta is not None:
1698 1704 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1699 1705 else:
1700 1706 text = buildtext()
1701 1707 data = self.compress(text)
1702 1708 l = len(data[1]) + len(data[0])
1703 1709 base = chainbase = curr
1704 1710
1705 1711 e = (offset_type(offset, flags), l, textlen,
1706 1712 base, link, p1r, p2r, node)
1707 1713 self.index.insert(-1, e)
1708 1714 self.nodemap[node] = curr
1709 1715
1710 1716 entry = self._io.packentry(e, self.node, self.version, curr)
1711 1717 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1712 1718
1713 1719 if alwayscache and text is None:
1714 1720 text = buildtext()
1715 1721
1716 1722 if type(text) == str: # only accept immutable objects
1717 1723 self._cache = (node, curr, text)
1718 1724 self._chainbasecache[curr] = chainbase
1719 1725 return node
1720 1726
1721 1727 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1722 1728 # Files opened in a+ mode have inconsistent behavior on various
1723 1729 # platforms. Windows requires that a file positioning call be made
1724 1730 # when the file handle transitions between reads and writes. See
1725 1731 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1726 1732 # platforms, Python or the platform itself can be buggy. Some versions
1727 1733 # of Solaris have been observed to not append at the end of the file
1728 1734 # if the file was seeked to before the end. See issue4943 for more.
1729 1735 #
1730 1736 # We work around this issue by inserting a seek() before writing.
1731 1737 # Note: This is likely not necessary on Python 3.
1732 1738 ifh.seek(0, os.SEEK_END)
1733 1739 if dfh:
1734 1740 dfh.seek(0, os.SEEK_END)
1735 1741
1736 1742 curr = len(self) - 1
1737 1743 if not self._inline:
1738 1744 transaction.add(self.datafile, offset)
1739 1745 transaction.add(self.indexfile, curr * len(entry))
1740 1746 if data[0]:
1741 1747 dfh.write(data[0])
1742 1748 dfh.write(data[1])
1743 1749 ifh.write(entry)
1744 1750 else:
1745 1751 offset += curr * self._io.size
1746 1752 transaction.add(self.indexfile, offset, curr)
1747 1753 ifh.write(entry)
1748 1754 ifh.write(data[0])
1749 1755 ifh.write(data[1])
1750 1756 self.checkinlinesize(transaction, ifh)
1751 1757
1752 1758 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1753 1759 """
1754 1760 add a delta group
1755 1761
1756 1762 given a set of deltas, add them to the revision log. the
1757 1763 first delta is against its parent, which should be in our
1758 1764 log, the rest are against the previous delta.
1759 1765
1760 1766 If ``addrevisioncb`` is defined, it will be called with arguments of
1761 1767 this revlog and the node that was added.
1762 1768 """
1763 1769
1764 1770 # track the base of the current delta log
1765 1771 content = []
1766 1772 node = None
1767 1773
1768 1774 r = len(self)
1769 1775 end = 0
1770 1776 if r:
1771 1777 end = self.end(r - 1)
1772 1778 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1773 1779 isize = r * self._io.size
1774 1780 if self._inline:
1775 1781 transaction.add(self.indexfile, end + isize, r)
1776 1782 dfh = None
1777 1783 else:
1778 1784 transaction.add(self.indexfile, isize, r)
1779 1785 transaction.add(self.datafile, end)
1780 1786 dfh = self.opener(self.datafile, "a+")
1781 1787 def flush():
1782 1788 if dfh:
1783 1789 dfh.flush()
1784 1790 ifh.flush()
1785 1791 try:
1786 1792 # loop through our set of deltas
1787 1793 chain = None
1788 1794 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1789 1795 node = chunkdata['node']
1790 1796 p1 = chunkdata['p1']
1791 1797 p2 = chunkdata['p2']
1792 1798 cs = chunkdata['cs']
1793 1799 deltabase = chunkdata['deltabase']
1794 1800 delta = chunkdata['delta']
1795 1801 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1796 1802
1797 1803 content.append(node)
1798 1804
1799 1805 link = linkmapper(cs)
1800 1806 if node in self.nodemap:
1801 1807 # this can happen if two branches make the same change
1802 1808 chain = node
1803 1809 continue
1804 1810
1805 1811 for p in (p1, p2):
1806 1812 if p not in self.nodemap:
1807 1813 raise LookupError(p, self.indexfile,
1808 1814 _('unknown parent'))
1809 1815
1810 1816 if deltabase not in self.nodemap:
1811 1817 raise LookupError(deltabase, self.indexfile,
1812 1818 _('unknown delta base'))
1813 1819
1814 1820 baserev = self.rev(deltabase)
1815 1821
1816 1822 if baserev != nullrev and self.iscensored(baserev):
1817 1823 # if base is censored, delta must be full replacement in a
1818 1824 # single patch operation
1819 1825 hlen = struct.calcsize(">lll")
1820 1826 oldlen = self.rawsize(baserev)
1821 1827 newlen = len(delta) - hlen
1822 1828 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1823 1829 raise error.CensoredBaseError(self.indexfile,
1824 1830 self.node(baserev))
1825 1831
1826 1832 if not flags and self._peek_iscensored(baserev, delta, flush):
1827 1833 flags |= REVIDX_ISCENSORED
1828 1834
1829 1835 # We assume consumers of addrevisioncb will want to retrieve
1830 1836 # the added revision, which will require a call to
1831 1837 # revision(). revision() will fast path if there is a cache
1832 1838 # hit. So, we tell _addrevision() to always cache in this case.
1833 1839 # We're only using addgroup() in the context of changegroup
1834 1840 # generation so the revision data can always be handled as raw
1835 1841 # by the flagprocessor.
1836 1842 chain = self._addrevision(node, None, transaction, link,
1837 1843 p1, p2, flags, (baserev, delta),
1838 1844 ifh, dfh,
1839 1845 alwayscache=bool(addrevisioncb),
1840 1846 raw=True)
1841 1847
1842 1848 if addrevisioncb:
1843 1849 addrevisioncb(self, chain)
1844 1850
1845 1851 if not dfh and not self._inline:
1846 1852 # addrevision switched from inline to conventional
1847 1853 # reopen the index
1848 1854 ifh.close()
1849 1855 dfh = self.opener(self.datafile, "a+")
1850 1856 ifh = self.opener(self.indexfile, "a+",
1851 1857 checkambig=self._checkambig)
1852 1858 finally:
1853 1859 if dfh:
1854 1860 dfh.close()
1855 1861 ifh.close()
1856 1862
1857 1863 return content
1858 1864
1859 1865 def iscensored(self, rev):
1860 1866 """Check if a file revision is censored."""
1861 1867 return False
1862 1868
1863 1869 def _peek_iscensored(self, baserev, delta, flush):
1864 1870 """Quickly check if a delta produces a censored revision."""
1865 1871 return False
1866 1872
1867 1873 def getstrippoint(self, minlink):
1868 1874 """find the minimum rev that must be stripped to strip the linkrev
1869 1875
1870 1876 Returns a tuple containing the minimum rev and a set of all revs that
1871 1877 have linkrevs that will be broken by this strip.
1872 1878 """
1873 1879 brokenrevs = set()
1874 1880 strippoint = len(self)
1875 1881
1876 1882 heads = {}
1877 1883 futurelargelinkrevs = set()
1878 1884 for head in self.headrevs():
1879 1885 headlinkrev = self.linkrev(head)
1880 1886 heads[head] = headlinkrev
1881 1887 if headlinkrev >= minlink:
1882 1888 futurelargelinkrevs.add(headlinkrev)
1883 1889
1884 1890 # This algorithm involves walking down the rev graph, starting at the
1885 1891 # heads. Since the revs are topologically sorted according to linkrev,
1886 1892 # once all head linkrevs are below the minlink, we know there are
1887 1893 # no more revs that could have a linkrev greater than minlink.
1888 1894 # So we can stop walking.
1889 1895 while futurelargelinkrevs:
1890 1896 strippoint -= 1
1891 1897 linkrev = heads.pop(strippoint)
1892 1898
1893 1899 if linkrev < minlink:
1894 1900 brokenrevs.add(strippoint)
1895 1901 else:
1896 1902 futurelargelinkrevs.remove(linkrev)
1897 1903
1898 1904 for p in self.parentrevs(strippoint):
1899 1905 if p != nullrev:
1900 1906 plinkrev = self.linkrev(p)
1901 1907 heads[p] = plinkrev
1902 1908 if plinkrev >= minlink:
1903 1909 futurelargelinkrevs.add(plinkrev)
1904 1910
1905 1911 return strippoint, brokenrevs
1906 1912
1907 1913 def strip(self, minlink, transaction):
1908 1914 """truncate the revlog on the first revision with a linkrev >= minlink
1909 1915
1910 1916 This function is called when we're stripping revision minlink and
1911 1917 its descendants from the repository.
1912 1918
1913 1919 We have to remove all revisions with linkrev >= minlink, because
1914 1920 the equivalent changelog revisions will be renumbered after the
1915 1921 strip.
1916 1922
1917 1923 So we truncate the revlog on the first of these revisions, and
1918 1924 trust that the caller has saved the revisions that shouldn't be
1919 1925 removed and that it'll re-add them after this truncation.
1920 1926 """
1921 1927 if len(self) == 0:
1922 1928 return
1923 1929
1924 1930 rev, _ = self.getstrippoint(minlink)
1925 1931 if rev == len(self):
1926 1932 return
1927 1933
1928 1934 # first truncate the files on disk
1929 1935 end = self.start(rev)
1930 1936 if not self._inline:
1931 1937 transaction.add(self.datafile, end)
1932 1938 end = rev * self._io.size
1933 1939 else:
1934 1940 end += rev * self._io.size
1935 1941
1936 1942 transaction.add(self.indexfile, end)
1937 1943
1938 1944 # then reset internal state in memory to forget those revisions
1939 1945 self._cache = None
1940 1946 self._chaininfocache = {}
1941 1947 self._chunkclear()
1942 1948 for x in xrange(rev, len(self)):
1943 1949 del self.nodemap[self.node(x)]
1944 1950
1945 1951 del self.index[rev:-1]
1946 1952
1947 1953 def checksize(self):
1948 1954 expected = 0
1949 1955 if len(self):
1950 1956 expected = max(0, self.end(len(self) - 1))
1951 1957
1952 1958 try:
1953 1959 f = self.opener(self.datafile)
1954 1960 f.seek(0, 2)
1955 1961 actual = f.tell()
1956 1962 f.close()
1957 1963 dd = actual - expected
1958 1964 except IOError as inst:
1959 1965 if inst.errno != errno.ENOENT:
1960 1966 raise
1961 1967 dd = 0
1962 1968
1963 1969 try:
1964 1970 f = self.opener(self.indexfile)
1965 1971 f.seek(0, 2)
1966 1972 actual = f.tell()
1967 1973 f.close()
1968 1974 s = self._io.size
1969 1975 i = max(0, actual // s)
1970 1976 di = actual - (i * s)
1971 1977 if self._inline:
1972 1978 databytes = 0
1973 1979 for r in self:
1974 1980 databytes += max(0, self.length(r))
1975 1981 dd = 0
1976 1982 di = actual - len(self) * s - databytes
1977 1983 except IOError as inst:
1978 1984 if inst.errno != errno.ENOENT:
1979 1985 raise
1980 1986 di = 0
1981 1987
1982 1988 return (dd, di)
1983 1989
1984 1990 def files(self):
1985 1991 res = [self.indexfile]
1986 1992 if not self._inline:
1987 1993 res.append(self.datafile)
1988 1994 return res
1989 1995
1990 1996 DELTAREUSEALWAYS = 'always'
1991 1997 DELTAREUSESAMEREVS = 'samerevs'
1992 1998 DELTAREUSENEVER = 'never'
1993 1999
1994 2000 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
1995 2001
1996 2002 def clone(self, tr, destrevlog, addrevisioncb=None,
1997 2003 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
1998 2004 """Copy this revlog to another, possibly with format changes.
1999 2005
2000 2006 The destination revlog will contain the same revisions and nodes.
2001 2007 However, it may not be bit-for-bit identical due to e.g. delta encoding
2002 2008 differences.
2003 2009
2004 2010 The ``deltareuse`` argument control how deltas from the existing revlog
2005 2011 are preserved in the destination revlog. The argument can have the
2006 2012 following values:
2007 2013
2008 2014 DELTAREUSEALWAYS
2009 2015 Deltas will always be reused (if possible), even if the destination
2010 2016 revlog would not select the same revisions for the delta. This is the
2011 2017 fastest mode of operation.
2012 2018 DELTAREUSESAMEREVS
2013 2019 Deltas will be reused if the destination revlog would pick the same
2014 2020 revisions for the delta. This mode strikes a balance between speed
2015 2021 and optimization.
2016 2022 DELTAREUSENEVER
2017 2023 Deltas will never be reused. This is the slowest mode of execution.
2018 2024 This mode can be used to recompute deltas (e.g. if the diff/delta
2019 2025 algorithm changes).
2020 2026
2021 2027 Delta computation can be slow, so the choice of delta reuse policy can
2022 2028 significantly affect run time.
2023 2029
2024 2030 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2025 2031 two extremes. Deltas will be reused if they are appropriate. But if the
2026 2032 delta could choose a better revision, it will do so. This means if you
2027 2033 are converting a non-generaldelta revlog to a generaldelta revlog,
2028 2034 deltas will be recomputed if the delta's parent isn't a parent of the
2029 2035 revision.
2030 2036
2031 2037 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2032 2038 controls whether to compute deltas against both parents for merges.
2033 2039 By default, the current default is used.
2034 2040 """
2035 2041 if deltareuse not in self.DELTAREUSEALL:
2036 2042 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2037 2043
2038 2044 if len(destrevlog):
2039 2045 raise ValueError(_('destination revlog is not empty'))
2040 2046
2041 2047 if getattr(self, 'filteredrevs', None):
2042 2048 raise ValueError(_('source revlog has filtered revisions'))
2043 2049 if getattr(destrevlog, 'filteredrevs', None):
2044 2050 raise ValueError(_('destination revlog has filtered revisions'))
2045 2051
2046 2052 # lazydeltabase controls whether to reuse a cached delta, if possible.
2047 2053 oldlazydeltabase = destrevlog._lazydeltabase
2048 2054 oldamd = destrevlog._aggressivemergedeltas
2049 2055
2050 2056 try:
2051 2057 if deltareuse == self.DELTAREUSEALWAYS:
2052 2058 destrevlog._lazydeltabase = True
2053 2059 elif deltareuse == self.DELTAREUSESAMEREVS:
2054 2060 destrevlog._lazydeltabase = False
2055 2061
2056 2062 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2057 2063
2058 2064 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2059 2065 self.DELTAREUSESAMEREVS)
2060 2066
2061 2067 index = self.index
2062 2068 for rev in self:
2063 2069 entry = index[rev]
2064 2070
2065 2071 # Some classes override linkrev to take filtered revs into
2066 2072 # account. Use raw entry from index.
2067 2073 flags = entry[0] & 0xffff
2068 2074 linkrev = entry[4]
2069 2075 p1 = index[entry[5]][7]
2070 2076 p2 = index[entry[6]][7]
2071 2077 node = entry[7]
2072 2078
2073 2079 # (Possibly) reuse the delta from the revlog if allowed and
2074 2080 # the revlog chunk is a delta.
2075 2081 cachedelta = None
2076 2082 text = None
2077 2083 if populatecachedelta:
2078 2084 dp = self.deltaparent(rev)
2079 2085 if dp != nullrev:
2080 2086 cachedelta = (dp, str(self._chunk(rev)))
2081 2087
2082 2088 if not cachedelta:
2083 2089 text = self.revision(rev)
2084 2090
2085 2091 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2086 2092 checkambig=False)
2087 2093 dfh = None
2088 2094 if not destrevlog._inline:
2089 2095 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2090 2096 try:
2091 2097 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
2092 2098 flags, cachedelta, ifh, dfh)
2093 2099 finally:
2094 2100 if dfh:
2095 2101 dfh.close()
2096 2102 ifh.close()
2097 2103
2098 2104 if addrevisioncb:
2099 2105 addrevisioncb(self, rev, node)
2100 2106 finally:
2101 2107 destrevlog._lazydeltabase = oldlazydeltabase
2102 2108 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now