##// END OF EJS Templates
revlog: make _addrevision only accept rawtext...
Jun Wu -
r31755:ec48d57d default
parent child Browse files
Show More
@@ -1,2113 +1,2112
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 pycompat,
37 37 templatefilters,
38 38 util,
39 39 )
40 40
41 41 _pack = struct.pack
42 42 _unpack = struct.unpack
43 43 # Aliased for performance.
44 44 _zlibdecompress = zlib.decompress
45 45
46 46 # revlog header flags
47 47 REVLOGV0 = 0
48 48 REVLOGNG = 1
49 49 REVLOGNGINLINEDATA = (1 << 16)
50 50 REVLOGGENERALDELTA = (1 << 17)
51 51 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
52 52 REVLOG_DEFAULT_FORMAT = REVLOGNG
53 53 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
54 54 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
55 55
56 56 # revlog index flags
57 57 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
58 58 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
59 59 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
60 60 REVIDX_DEFAULT_FLAGS = 0
61 61 # stable order in which flags need to be processed and their processors applied
62 62 REVIDX_FLAGS_ORDER = [
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_ELLIPSIS,
65 65 REVIDX_EXTSTORED,
66 66 ]
67 67 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
68 68
69 69 # max size of revlog with inline data
70 70 _maxinline = 131072
71 71 _chunksize = 1048576
72 72
73 73 RevlogError = error.RevlogError
74 74 LookupError = error.LookupError
75 75 CensoredNodeError = error.CensoredNodeError
76 76 ProgrammingError = error.ProgrammingError
77 77
78 78 # Store flag processors (cf. 'addflagprocessor()' to register)
79 79 _flagprocessors = {
80 80 REVIDX_ISCENSORED: None,
81 81 }
82 82
83 83 def addflagprocessor(flag, processor):
84 84 """Register a flag processor on a revision data flag.
85 85
86 86 Invariant:
87 87 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
88 88 - Only one flag processor can be registered on a specific flag.
89 89 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
90 90 following signatures:
91 91 - (read) f(self, rawtext) -> text, bool
92 92 - (write) f(self, text) -> rawtext, bool
93 93 - (raw) f(self, rawtext) -> bool
94 94 "text" is presented to the user. "rawtext" is stored in revlog data, not
95 95 directly visible to the user.
96 96 The boolean returned by these transforms is used to determine whether
97 97 the returned text can be used for hash integrity checking. For example,
98 98 if "write" returns False, then "text" is used to generate hash. If
99 99 "write" returns True, that basically means "rawtext" returned by "write"
100 100 should be used to generate hash. Usually, "write" and "read" return
101 101 different booleans. And "raw" returns a same boolean as "write".
102 102
103 103 Note: The 'raw' transform is used for changegroup generation and in some
104 104 debug commands. In this case the transform only indicates whether the
105 105 contents can be used for hash integrity checks.
106 106 """
107 107 if not flag & REVIDX_KNOWN_FLAGS:
108 108 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
109 109 raise ProgrammingError(msg)
110 110 if flag not in REVIDX_FLAGS_ORDER:
111 111 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
112 112 raise ProgrammingError(msg)
113 113 if flag in _flagprocessors:
114 114 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
115 115 raise error.Abort(msg)
116 116 _flagprocessors[flag] = processor
117 117
118 118 def getoffset(q):
119 119 return int(q >> 16)
120 120
121 121 def gettype(q):
122 122 return int(q & 0xFFFF)
123 123
124 124 def offset_type(offset, type):
125 125 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
126 126 raise ValueError('unknown revlog index flags')
127 127 return int(int(offset) << 16 | type)
128 128
129 129 _nullhash = hashlib.sha1(nullid)
130 130
131 131 def hash(text, p1, p2):
132 132 """generate a hash from the given text and its parent hashes
133 133
134 134 This hash combines both the current file contents and its history
135 135 in a manner that makes it easy to distinguish nodes with the same
136 136 content in the revision graph.
137 137 """
138 138 # As of now, if one of the parent node is null, p2 is null
139 139 if p2 == nullid:
140 140 # deep copy of a hash is faster than creating one
141 141 s = _nullhash.copy()
142 142 s.update(p1)
143 143 else:
144 144 # none of the parent nodes are nullid
145 145 l = [p1, p2]
146 146 l.sort()
147 147 s = hashlib.sha1(l[0])
148 148 s.update(l[1])
149 149 s.update(text)
150 150 return s.digest()
151 151
152 152 # index v0:
153 153 # 4 bytes: offset
154 154 # 4 bytes: compressed length
155 155 # 4 bytes: base rev
156 156 # 4 bytes: link rev
157 157 # 20 bytes: parent 1 nodeid
158 158 # 20 bytes: parent 2 nodeid
159 159 # 20 bytes: nodeid
160 160 indexformatv0 = ">4l20s20s20s"
161 161
162 162 class revlogoldio(object):
163 163 def __init__(self):
164 164 self.size = struct.calcsize(indexformatv0)
165 165
166 166 def parseindex(self, data, inline):
167 167 s = self.size
168 168 index = []
169 169 nodemap = {nullid: nullrev}
170 170 n = off = 0
171 171 l = len(data)
172 172 while off + s <= l:
173 173 cur = data[off:off + s]
174 174 off += s
175 175 e = _unpack(indexformatv0, cur)
176 176 # transform to revlogv1 format
177 177 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
178 178 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
179 179 index.append(e2)
180 180 nodemap[e[6]] = n
181 181 n += 1
182 182
183 183 # add the magic null revision at -1
184 184 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
185 185
186 186 return index, nodemap, None
187 187
188 188 def packentry(self, entry, node, version, rev):
189 189 if gettype(entry[0]):
190 190 raise RevlogError(_("index entry flags need RevlogNG"))
191 191 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
192 192 node(entry[5]), node(entry[6]), entry[7])
193 193 return _pack(indexformatv0, *e2)
194 194
195 195 # index ng:
196 196 # 6 bytes: offset
197 197 # 2 bytes: flags
198 198 # 4 bytes: compressed length
199 199 # 4 bytes: uncompressed length
200 200 # 4 bytes: base rev
201 201 # 4 bytes: link rev
202 202 # 4 bytes: parent 1 rev
203 203 # 4 bytes: parent 2 rev
204 204 # 32 bytes: nodeid
205 205 indexformatng = ">Qiiiiii20s12x"
206 206 versionformat = ">I"
207 207
208 208 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
209 209 # signed integer)
210 210 _maxentrysize = 0x7fffffff
211 211
212 212 class revlogio(object):
213 213 def __init__(self):
214 214 self.size = struct.calcsize(indexformatng)
215 215
216 216 def parseindex(self, data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline)
219 219 return index, getattr(index, 'nodemap', None), cache
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 p = _pack(indexformatng, *entry)
223 223 if rev == 0:
224 224 p = _pack(versionformat, version) + p[4:]
225 225 return p
226 226
227 227 class revlog(object):
228 228 """
229 229 the underlying revision storage object
230 230
231 231 A revlog consists of two parts, an index and the revision data.
232 232
233 233 The index is a file with a fixed record size containing
234 234 information on each revision, including its nodeid (hash), the
235 235 nodeids of its parents, the position and offset of its data within
236 236 the data file, and the revision it's based on. Finally, each entry
237 237 contains a linkrev entry that can serve as a pointer to external
238 238 data.
239 239
240 240 The revision data itself is a linear collection of data chunks.
241 241 Each chunk represents a revision and is usually represented as a
242 242 delta against the previous chunk. To bound lookup time, runs of
243 243 deltas are limited to about 2 times the length of the original
244 244 version data. This makes retrieval of a version proportional to
245 245 its size, or O(1) relative to the number of revisions.
246 246
247 247 Both pieces of the revlog are written to in an append-only
248 248 fashion, which means we never need to rewrite a file to insert or
249 249 remove data, and can use some simple techniques to avoid the need
250 250 for locking while reading.
251 251
252 252 If checkambig, indexfile is opened with checkambig=True at
253 253 writing, to avoid file stat ambiguity.
254 254 """
255 255 def __init__(self, opener, indexfile, checkambig=False):
256 256 """
257 257 create a revlog object
258 258
259 259 opener is a function that abstracts the file opening operation
260 260 and can be used to implement COW semantics or the like.
261 261 """
262 262 self.indexfile = indexfile
263 263 self.datafile = indexfile[:-2] + ".d"
264 264 self.opener = opener
265 265 # When True, indexfile is opened with checkambig=True at writing, to
266 266 # avoid file stat ambiguity.
267 267 self._checkambig = checkambig
268 268 # 3-tuple of (node, rev, text) for a raw revision.
269 269 self._cache = None
270 270 # Maps rev to chain base rev.
271 271 self._chainbasecache = util.lrucachedict(100)
272 272 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
273 273 self._chunkcache = (0, '')
274 274 # How much data to read and cache into the raw revlog data cache.
275 275 self._chunkcachesize = 65536
276 276 self._maxchainlen = None
277 277 self._aggressivemergedeltas = False
278 278 self.index = []
279 279 # Mapping of partial identifiers to full nodes.
280 280 self._pcache = {}
281 281 # Mapping of revision integer to full node.
282 282 self._nodecache = {nullid: nullrev}
283 283 self._nodepos = None
284 284 self._compengine = 'zlib'
285 285
286 286 v = REVLOG_DEFAULT_VERSION
287 287 opts = getattr(opener, 'options', None)
288 288 if opts is not None:
289 289 if 'revlogv1' in opts:
290 290 if 'generaldelta' in opts:
291 291 v |= REVLOGGENERALDELTA
292 292 else:
293 293 v = 0
294 294 if 'chunkcachesize' in opts:
295 295 self._chunkcachesize = opts['chunkcachesize']
296 296 if 'maxchainlen' in opts:
297 297 self._maxchainlen = opts['maxchainlen']
298 298 if 'aggressivemergedeltas' in opts:
299 299 self._aggressivemergedeltas = opts['aggressivemergedeltas']
300 300 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
301 301 if 'compengine' in opts:
302 302 self._compengine = opts['compengine']
303 303
304 304 if self._chunkcachesize <= 0:
305 305 raise RevlogError(_('revlog chunk cache size %r is not greater '
306 306 'than 0') % self._chunkcachesize)
307 307 elif self._chunkcachesize & (self._chunkcachesize - 1):
308 308 raise RevlogError(_('revlog chunk cache size %r is not a power '
309 309 'of 2') % self._chunkcachesize)
310 310
311 311 indexdata = ''
312 312 self._initempty = True
313 313 try:
314 314 f = self.opener(self.indexfile)
315 315 indexdata = f.read()
316 316 f.close()
317 317 if len(indexdata) > 0:
318 318 v = struct.unpack(versionformat, indexdata[:4])[0]
319 319 self._initempty = False
320 320 except IOError as inst:
321 321 if inst.errno != errno.ENOENT:
322 322 raise
323 323
324 324 self.version = v
325 325 self._inline = v & REVLOGNGINLINEDATA
326 326 self._generaldelta = v & REVLOGGENERALDELTA
327 327 flags = v & ~0xFFFF
328 328 fmt = v & 0xFFFF
329 329 if fmt == REVLOGV0 and flags:
330 330 raise RevlogError(_("index %s unknown flags %#04x for format v0")
331 331 % (self.indexfile, flags >> 16))
332 332 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
333 333 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
334 334 % (self.indexfile, flags >> 16))
335 335 elif fmt > REVLOGNG:
336 336 raise RevlogError(_("index %s unknown format %d")
337 337 % (self.indexfile, fmt))
338 338
339 339 self.storedeltachains = True
340 340
341 341 self._io = revlogio()
342 342 if self.version == REVLOGV0:
343 343 self._io = revlogoldio()
344 344 try:
345 345 d = self._io.parseindex(indexdata, self._inline)
346 346 except (ValueError, IndexError):
347 347 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
348 348 self.index, nodemap, self._chunkcache = d
349 349 if nodemap is not None:
350 350 self.nodemap = self._nodecache = nodemap
351 351 if not self._chunkcache:
352 352 self._chunkclear()
353 353 # revnum -> (chain-length, sum-delta-length)
354 354 self._chaininfocache = {}
355 355 # revlog header -> revlog compressor
356 356 self._decompressors = {}
357 357
358 358 @util.propertycache
359 359 def _compressor(self):
360 360 return util.compengines[self._compengine].revlogcompressor()
361 361
362 362 def tip(self):
363 363 return self.node(len(self.index) - 2)
364 364 def __contains__(self, rev):
365 365 return 0 <= rev < len(self)
366 366 def __len__(self):
367 367 return len(self.index) - 1
368 368 def __iter__(self):
369 369 return iter(xrange(len(self)))
370 370 def revs(self, start=0, stop=None):
371 371 """iterate over all rev in this revlog (from start to stop)"""
372 372 step = 1
373 373 if stop is not None:
374 374 if start > stop:
375 375 step = -1
376 376 stop += step
377 377 else:
378 378 stop = len(self)
379 379 return xrange(start, stop, step)
380 380
381 381 @util.propertycache
382 382 def nodemap(self):
383 383 self.rev(self.node(0))
384 384 return self._nodecache
385 385
386 386 def hasnode(self, node):
387 387 try:
388 388 self.rev(node)
389 389 return True
390 390 except KeyError:
391 391 return False
392 392
393 393 def clearcaches(self):
394 394 self._cache = None
395 395 self._chainbasecache.clear()
396 396 self._chunkcache = (0, '')
397 397 self._pcache = {}
398 398
399 399 try:
400 400 self._nodecache.clearcaches()
401 401 except AttributeError:
402 402 self._nodecache = {nullid: nullrev}
403 403 self._nodepos = None
404 404
405 405 def rev(self, node):
406 406 try:
407 407 return self._nodecache[node]
408 408 except TypeError:
409 409 raise
410 410 except RevlogError:
411 411 # parsers.c radix tree lookup failed
412 412 raise LookupError(node, self.indexfile, _('no node'))
413 413 except KeyError:
414 414 # pure python cache lookup failed
415 415 n = self._nodecache
416 416 i = self.index
417 417 p = self._nodepos
418 418 if p is None:
419 419 p = len(i) - 2
420 420 for r in xrange(p, -1, -1):
421 421 v = i[r][7]
422 422 n[v] = r
423 423 if v == node:
424 424 self._nodepos = r - 1
425 425 return r
426 426 raise LookupError(node, self.indexfile, _('no node'))
427 427
428 428 # Accessors for index entries.
429 429
430 430 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
431 431 # are flags.
432 432 def start(self, rev):
433 433 return int(self.index[rev][0] >> 16)
434 434
435 435 def flags(self, rev):
436 436 return self.index[rev][0] & 0xFFFF
437 437
438 438 def length(self, rev):
439 439 return self.index[rev][1]
440 440
441 441 def rawsize(self, rev):
442 442 """return the length of the uncompressed text for a given revision"""
443 443 l = self.index[rev][2]
444 444 if l >= 0:
445 445 return l
446 446
447 447 t = self.revision(rev)
448 448 return len(t)
449 449 size = rawsize
450 450
451 451 def chainbase(self, rev):
452 452 base = self._chainbasecache.get(rev)
453 453 if base is not None:
454 454 return base
455 455
456 456 index = self.index
457 457 base = index[rev][3]
458 458 while base != rev:
459 459 rev = base
460 460 base = index[rev][3]
461 461
462 462 self._chainbasecache[rev] = base
463 463 return base
464 464
465 465 def linkrev(self, rev):
466 466 return self.index[rev][4]
467 467
468 468 def parentrevs(self, rev):
469 469 return self.index[rev][5:7]
470 470
471 471 def node(self, rev):
472 472 return self.index[rev][7]
473 473
474 474 # Derived from index values.
475 475
476 476 def end(self, rev):
477 477 return self.start(rev) + self.length(rev)
478 478
479 479 def parents(self, node):
480 480 i = self.index
481 481 d = i[self.rev(node)]
482 482 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
483 483
484 484 def chainlen(self, rev):
485 485 return self._chaininfo(rev)[0]
486 486
487 487 def _chaininfo(self, rev):
488 488 chaininfocache = self._chaininfocache
489 489 if rev in chaininfocache:
490 490 return chaininfocache[rev]
491 491 index = self.index
492 492 generaldelta = self._generaldelta
493 493 iterrev = rev
494 494 e = index[iterrev]
495 495 clen = 0
496 496 compresseddeltalen = 0
497 497 while iterrev != e[3]:
498 498 clen += 1
499 499 compresseddeltalen += e[1]
500 500 if generaldelta:
501 501 iterrev = e[3]
502 502 else:
503 503 iterrev -= 1
504 504 if iterrev in chaininfocache:
505 505 t = chaininfocache[iterrev]
506 506 clen += t[0]
507 507 compresseddeltalen += t[1]
508 508 break
509 509 e = index[iterrev]
510 510 else:
511 511 # Add text length of base since decompressing that also takes
512 512 # work. For cache hits the length is already included.
513 513 compresseddeltalen += e[1]
514 514 r = (clen, compresseddeltalen)
515 515 chaininfocache[rev] = r
516 516 return r
517 517
518 518 def _deltachain(self, rev, stoprev=None):
519 519 """Obtain the delta chain for a revision.
520 520
521 521 ``stoprev`` specifies a revision to stop at. If not specified, we
522 522 stop at the base of the chain.
523 523
524 524 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
525 525 revs in ascending order and ``stopped`` is a bool indicating whether
526 526 ``stoprev`` was hit.
527 527 """
528 528 chain = []
529 529
530 530 # Alias to prevent attribute lookup in tight loop.
531 531 index = self.index
532 532 generaldelta = self._generaldelta
533 533
534 534 iterrev = rev
535 535 e = index[iterrev]
536 536 while iterrev != e[3] and iterrev != stoprev:
537 537 chain.append(iterrev)
538 538 if generaldelta:
539 539 iterrev = e[3]
540 540 else:
541 541 iterrev -= 1
542 542 e = index[iterrev]
543 543
544 544 if iterrev == stoprev:
545 545 stopped = True
546 546 else:
547 547 chain.append(iterrev)
548 548 stopped = False
549 549
550 550 chain.reverse()
551 551 return chain, stopped
552 552
553 553 def ancestors(self, revs, stoprev=0, inclusive=False):
554 554 """Generate the ancestors of 'revs' in reverse topological order.
555 555 Does not generate revs lower than stoprev.
556 556
557 557 See the documentation for ancestor.lazyancestors for more details."""
558 558
559 559 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
560 560 inclusive=inclusive)
561 561
562 562 def descendants(self, revs):
563 563 """Generate the descendants of 'revs' in revision order.
564 564
565 565 Yield a sequence of revision numbers starting with a child of
566 566 some rev in revs, i.e., each revision is *not* considered a
567 567 descendant of itself. Results are ordered by revision number (a
568 568 topological sort)."""
569 569 first = min(revs)
570 570 if first == nullrev:
571 571 for i in self:
572 572 yield i
573 573 return
574 574
575 575 seen = set(revs)
576 576 for i in self.revs(start=first + 1):
577 577 for x in self.parentrevs(i):
578 578 if x != nullrev and x in seen:
579 579 seen.add(i)
580 580 yield i
581 581 break
582 582
583 583 def findcommonmissing(self, common=None, heads=None):
584 584 """Return a tuple of the ancestors of common and the ancestors of heads
585 585 that are not ancestors of common. In revset terminology, we return the
586 586 tuple:
587 587
588 588 ::common, (::heads) - (::common)
589 589
590 590 The list is sorted by revision number, meaning it is
591 591 topologically sorted.
592 592
593 593 'heads' and 'common' are both lists of node IDs. If heads is
594 594 not supplied, uses all of the revlog's heads. If common is not
595 595 supplied, uses nullid."""
596 596 if common is None:
597 597 common = [nullid]
598 598 if heads is None:
599 599 heads = self.heads()
600 600
601 601 common = [self.rev(n) for n in common]
602 602 heads = [self.rev(n) for n in heads]
603 603
604 604 # we want the ancestors, but inclusive
605 605 class lazyset(object):
606 606 def __init__(self, lazyvalues):
607 607 self.addedvalues = set()
608 608 self.lazyvalues = lazyvalues
609 609
610 610 def __contains__(self, value):
611 611 return value in self.addedvalues or value in self.lazyvalues
612 612
613 613 def __iter__(self):
614 614 added = self.addedvalues
615 615 for r in added:
616 616 yield r
617 617 for r in self.lazyvalues:
618 618 if not r in added:
619 619 yield r
620 620
621 621 def add(self, value):
622 622 self.addedvalues.add(value)
623 623
624 624 def update(self, values):
625 625 self.addedvalues.update(values)
626 626
627 627 has = lazyset(self.ancestors(common))
628 628 has.add(nullrev)
629 629 has.update(common)
630 630
631 631 # take all ancestors from heads that aren't in has
632 632 missing = set()
633 633 visit = collections.deque(r for r in heads if r not in has)
634 634 while visit:
635 635 r = visit.popleft()
636 636 if r in missing:
637 637 continue
638 638 else:
639 639 missing.add(r)
640 640 for p in self.parentrevs(r):
641 641 if p not in has:
642 642 visit.append(p)
643 643 missing = list(missing)
644 644 missing.sort()
645 645 return has, [self.node(miss) for miss in missing]
646 646
647 647 def incrementalmissingrevs(self, common=None):
648 648 """Return an object that can be used to incrementally compute the
649 649 revision numbers of the ancestors of arbitrary sets that are not
650 650 ancestors of common. This is an ancestor.incrementalmissingancestors
651 651 object.
652 652
653 653 'common' is a list of revision numbers. If common is not supplied, uses
654 654 nullrev.
655 655 """
656 656 if common is None:
657 657 common = [nullrev]
658 658
659 659 return ancestor.incrementalmissingancestors(self.parentrevs, common)
660 660
661 661 def findmissingrevs(self, common=None, heads=None):
662 662 """Return the revision numbers of the ancestors of heads that
663 663 are not ancestors of common.
664 664
665 665 More specifically, return a list of revision numbers corresponding to
666 666 nodes N such that every N satisfies the following constraints:
667 667
668 668 1. N is an ancestor of some node in 'heads'
669 669 2. N is not an ancestor of any node in 'common'
670 670
671 671 The list is sorted by revision number, meaning it is
672 672 topologically sorted.
673 673
674 674 'heads' and 'common' are both lists of revision numbers. If heads is
675 675 not supplied, uses all of the revlog's heads. If common is not
676 676 supplied, uses nullid."""
677 677 if common is None:
678 678 common = [nullrev]
679 679 if heads is None:
680 680 heads = self.headrevs()
681 681
682 682 inc = self.incrementalmissingrevs(common=common)
683 683 return inc.missingancestors(heads)
684 684
685 685 def findmissing(self, common=None, heads=None):
686 686 """Return the ancestors of heads that are not ancestors of common.
687 687
688 688 More specifically, return a list of nodes N such that every N
689 689 satisfies the following constraints:
690 690
691 691 1. N is an ancestor of some node in 'heads'
692 692 2. N is not an ancestor of any node in 'common'
693 693
694 694 The list is sorted by revision number, meaning it is
695 695 topologically sorted.
696 696
697 697 'heads' and 'common' are both lists of node IDs. If heads is
698 698 not supplied, uses all of the revlog's heads. If common is not
699 699 supplied, uses nullid."""
700 700 if common is None:
701 701 common = [nullid]
702 702 if heads is None:
703 703 heads = self.heads()
704 704
705 705 common = [self.rev(n) for n in common]
706 706 heads = [self.rev(n) for n in heads]
707 707
708 708 inc = self.incrementalmissingrevs(common=common)
709 709 return [self.node(r) for r in inc.missingancestors(heads)]
710 710
711 711 def nodesbetween(self, roots=None, heads=None):
712 712 """Return a topological path from 'roots' to 'heads'.
713 713
714 714 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
715 715 topologically sorted list of all nodes N that satisfy both of
716 716 these constraints:
717 717
718 718 1. N is a descendant of some node in 'roots'
719 719 2. N is an ancestor of some node in 'heads'
720 720
721 721 Every node is considered to be both a descendant and an ancestor
722 722 of itself, so every reachable node in 'roots' and 'heads' will be
723 723 included in 'nodes'.
724 724
725 725 'outroots' is the list of reachable nodes in 'roots', i.e., the
726 726 subset of 'roots' that is returned in 'nodes'. Likewise,
727 727 'outheads' is the subset of 'heads' that is also in 'nodes'.
728 728
729 729 'roots' and 'heads' are both lists of node IDs. If 'roots' is
730 730 unspecified, uses nullid as the only root. If 'heads' is
731 731 unspecified, uses list of all of the revlog's heads."""
732 732 nonodes = ([], [], [])
733 733 if roots is not None:
734 734 roots = list(roots)
735 735 if not roots:
736 736 return nonodes
737 737 lowestrev = min([self.rev(n) for n in roots])
738 738 else:
739 739 roots = [nullid] # Everybody's a descendant of nullid
740 740 lowestrev = nullrev
741 741 if (lowestrev == nullrev) and (heads is None):
742 742 # We want _all_ the nodes!
743 743 return ([self.node(r) for r in self], [nullid], list(self.heads()))
744 744 if heads is None:
745 745 # All nodes are ancestors, so the latest ancestor is the last
746 746 # node.
747 747 highestrev = len(self) - 1
748 748 # Set ancestors to None to signal that every node is an ancestor.
749 749 ancestors = None
750 750 # Set heads to an empty dictionary for later discovery of heads
751 751 heads = {}
752 752 else:
753 753 heads = list(heads)
754 754 if not heads:
755 755 return nonodes
756 756 ancestors = set()
757 757 # Turn heads into a dictionary so we can remove 'fake' heads.
758 758 # Also, later we will be using it to filter out the heads we can't
759 759 # find from roots.
760 760 heads = dict.fromkeys(heads, False)
761 761 # Start at the top and keep marking parents until we're done.
762 762 nodestotag = set(heads)
763 763 # Remember where the top was so we can use it as a limit later.
764 764 highestrev = max([self.rev(n) for n in nodestotag])
765 765 while nodestotag:
766 766 # grab a node to tag
767 767 n = nodestotag.pop()
768 768 # Never tag nullid
769 769 if n == nullid:
770 770 continue
771 771 # A node's revision number represents its place in a
772 772 # topologically sorted list of nodes.
773 773 r = self.rev(n)
774 774 if r >= lowestrev:
775 775 if n not in ancestors:
776 776 # If we are possibly a descendant of one of the roots
777 777 # and we haven't already been marked as an ancestor
778 778 ancestors.add(n) # Mark as ancestor
779 779 # Add non-nullid parents to list of nodes to tag.
780 780 nodestotag.update([p for p in self.parents(n) if
781 781 p != nullid])
782 782 elif n in heads: # We've seen it before, is it a fake head?
783 783 # So it is, real heads should not be the ancestors of
784 784 # any other heads.
785 785 heads.pop(n)
786 786 if not ancestors:
787 787 return nonodes
788 788 # Now that we have our set of ancestors, we want to remove any
789 789 # roots that are not ancestors.
790 790
791 791 # If one of the roots was nullid, everything is included anyway.
792 792 if lowestrev > nullrev:
793 793 # But, since we weren't, let's recompute the lowest rev to not
794 794 # include roots that aren't ancestors.
795 795
796 796 # Filter out roots that aren't ancestors of heads
797 797 roots = [root for root in roots if root in ancestors]
798 798 # Recompute the lowest revision
799 799 if roots:
800 800 lowestrev = min([self.rev(root) for root in roots])
801 801 else:
802 802 # No more roots? Return empty list
803 803 return nonodes
804 804 else:
805 805 # We are descending from nullid, and don't need to care about
806 806 # any other roots.
807 807 lowestrev = nullrev
808 808 roots = [nullid]
809 809 # Transform our roots list into a set.
810 810 descendants = set(roots)
811 811 # Also, keep the original roots so we can filter out roots that aren't
812 812 # 'real' roots (i.e. are descended from other roots).
813 813 roots = descendants.copy()
814 814 # Our topologically sorted list of output nodes.
815 815 orderedout = []
816 816 # Don't start at nullid since we don't want nullid in our output list,
817 817 # and if nullid shows up in descendants, empty parents will look like
818 818 # they're descendants.
819 819 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
820 820 n = self.node(r)
821 821 isdescendant = False
822 822 if lowestrev == nullrev: # Everybody is a descendant of nullid
823 823 isdescendant = True
824 824 elif n in descendants:
825 825 # n is already a descendant
826 826 isdescendant = True
827 827 # This check only needs to be done here because all the roots
828 828 # will start being marked is descendants before the loop.
829 829 if n in roots:
830 830 # If n was a root, check if it's a 'real' root.
831 831 p = tuple(self.parents(n))
832 832 # If any of its parents are descendants, it's not a root.
833 833 if (p[0] in descendants) or (p[1] in descendants):
834 834 roots.remove(n)
835 835 else:
836 836 p = tuple(self.parents(n))
837 837 # A node is a descendant if either of its parents are
838 838 # descendants. (We seeded the dependents list with the roots
839 839 # up there, remember?)
840 840 if (p[0] in descendants) or (p[1] in descendants):
841 841 descendants.add(n)
842 842 isdescendant = True
843 843 if isdescendant and ((ancestors is None) or (n in ancestors)):
844 844 # Only include nodes that are both descendants and ancestors.
845 845 orderedout.append(n)
846 846 if (ancestors is not None) and (n in heads):
847 847 # We're trying to figure out which heads are reachable
848 848 # from roots.
849 849 # Mark this head as having been reached
850 850 heads[n] = True
851 851 elif ancestors is None:
852 852 # Otherwise, we're trying to discover the heads.
853 853 # Assume this is a head because if it isn't, the next step
854 854 # will eventually remove it.
855 855 heads[n] = True
856 856 # But, obviously its parents aren't.
857 857 for p in self.parents(n):
858 858 heads.pop(p, None)
859 859 heads = [head for head, flag in heads.iteritems() if flag]
860 860 roots = list(roots)
861 861 assert orderedout
862 862 assert roots
863 863 assert heads
864 864 return (orderedout, roots, heads)
865 865
866 866 def headrevs(self):
867 867 try:
868 868 return self.index.headrevs()
869 869 except AttributeError:
870 870 return self._headrevs()
871 871
872 872 def computephases(self, roots):
873 873 return self.index.computephasesmapsets(roots)
874 874
875 875 def _headrevs(self):
876 876 count = len(self)
877 877 if not count:
878 878 return [nullrev]
879 879 # we won't iter over filtered rev so nobody is a head at start
880 880 ishead = [0] * (count + 1)
881 881 index = self.index
882 882 for r in self:
883 883 ishead[r] = 1 # I may be an head
884 884 e = index[r]
885 885 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
886 886 return [r for r, val in enumerate(ishead) if val]
887 887
888 888 def heads(self, start=None, stop=None):
889 889 """return the list of all nodes that have no children
890 890
891 891 if start is specified, only heads that are descendants of
892 892 start will be returned
893 893 if stop is specified, it will consider all the revs from stop
894 894 as if they had no children
895 895 """
896 896 if start is None and stop is None:
897 897 if not len(self):
898 898 return [nullid]
899 899 return [self.node(r) for r in self.headrevs()]
900 900
901 901 if start is None:
902 902 start = nullid
903 903 if stop is None:
904 904 stop = []
905 905 stoprevs = set([self.rev(n) for n in stop])
906 906 startrev = self.rev(start)
907 907 reachable = set((startrev,))
908 908 heads = set((startrev,))
909 909
910 910 parentrevs = self.parentrevs
911 911 for r in self.revs(start=startrev + 1):
912 912 for p in parentrevs(r):
913 913 if p in reachable:
914 914 if r not in stoprevs:
915 915 reachable.add(r)
916 916 heads.add(r)
917 917 if p in heads and p not in stoprevs:
918 918 heads.remove(p)
919 919
920 920 return [self.node(r) for r in heads]
921 921
922 922 def children(self, node):
923 923 """find the children of a given node"""
924 924 c = []
925 925 p = self.rev(node)
926 926 for r in self.revs(start=p + 1):
927 927 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
928 928 if prevs:
929 929 for pr in prevs:
930 930 if pr == p:
931 931 c.append(self.node(r))
932 932 elif p == nullrev:
933 933 c.append(self.node(r))
934 934 return c
935 935
936 936 def descendant(self, start, end):
937 937 if start == nullrev:
938 938 return True
939 939 for i in self.descendants([start]):
940 940 if i == end:
941 941 return True
942 942 elif i > end:
943 943 break
944 944 return False
945 945
946 946 def commonancestorsheads(self, a, b):
947 947 """calculate all the heads of the common ancestors of nodes a and b"""
948 948 a, b = self.rev(a), self.rev(b)
949 949 try:
950 950 ancs = self.index.commonancestorsheads(a, b)
951 951 except (AttributeError, OverflowError): # C implementation failed
952 952 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
953 953 return pycompat.maplist(self.node, ancs)
954 954
955 955 def isancestor(self, a, b):
956 956 """return True if node a is an ancestor of node b
957 957
958 958 The implementation of this is trivial but the use of
959 959 commonancestorsheads is not."""
960 960 return a in self.commonancestorsheads(a, b)
961 961
962 962 def ancestor(self, a, b):
963 963 """calculate the "best" common ancestor of nodes a and b"""
964 964
965 965 a, b = self.rev(a), self.rev(b)
966 966 try:
967 967 ancs = self.index.ancestors(a, b)
968 968 except (AttributeError, OverflowError):
969 969 ancs = ancestor.ancestors(self.parentrevs, a, b)
970 970 if ancs:
971 971 # choose a consistent winner when there's a tie
972 972 return min(map(self.node, ancs))
973 973 return nullid
974 974
975 975 def _match(self, id):
976 976 if isinstance(id, int):
977 977 # rev
978 978 return self.node(id)
979 979 if len(id) == 20:
980 980 # possibly a binary node
981 981 # odds of a binary node being all hex in ASCII are 1 in 10**25
982 982 try:
983 983 node = id
984 984 self.rev(node) # quick search the index
985 985 return node
986 986 except LookupError:
987 987 pass # may be partial hex id
988 988 try:
989 989 # str(rev)
990 990 rev = int(id)
991 991 if str(rev) != id:
992 992 raise ValueError
993 993 if rev < 0:
994 994 rev = len(self) + rev
995 995 if rev < 0 or rev >= len(self):
996 996 raise ValueError
997 997 return self.node(rev)
998 998 except (ValueError, OverflowError):
999 999 pass
1000 1000 if len(id) == 40:
1001 1001 try:
1002 1002 # a full hex nodeid?
1003 1003 node = bin(id)
1004 1004 self.rev(node)
1005 1005 return node
1006 1006 except (TypeError, LookupError):
1007 1007 pass
1008 1008
1009 1009 def _partialmatch(self, id):
1010 1010 try:
1011 1011 partial = self.index.partialmatch(id)
1012 1012 if partial and self.hasnode(partial):
1013 1013 return partial
1014 1014 return None
1015 1015 except RevlogError:
1016 1016 # parsers.c radix tree lookup gave multiple matches
1017 1017 # fast path: for unfiltered changelog, radix tree is accurate
1018 1018 if not getattr(self, 'filteredrevs', None):
1019 1019 raise LookupError(id, self.indexfile,
1020 1020 _('ambiguous identifier'))
1021 1021 # fall through to slow path that filters hidden revisions
1022 1022 except (AttributeError, ValueError):
1023 1023 # we are pure python, or key was too short to search radix tree
1024 1024 pass
1025 1025
1026 1026 if id in self._pcache:
1027 1027 return self._pcache[id]
1028 1028
1029 1029 if len(id) < 40:
1030 1030 try:
1031 1031 # hex(node)[:...]
1032 1032 l = len(id) // 2 # grab an even number of digits
1033 1033 prefix = bin(id[:l * 2])
1034 1034 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1035 1035 nl = [n for n in nl if hex(n).startswith(id) and
1036 1036 self.hasnode(n)]
1037 1037 if len(nl) > 0:
1038 1038 if len(nl) == 1:
1039 1039 self._pcache[id] = nl[0]
1040 1040 return nl[0]
1041 1041 raise LookupError(id, self.indexfile,
1042 1042 _('ambiguous identifier'))
1043 1043 return None
1044 1044 except TypeError:
1045 1045 pass
1046 1046
1047 1047 def lookup(self, id):
1048 1048 """locate a node based on:
1049 1049 - revision number or str(revision number)
1050 1050 - nodeid or subset of hex nodeid
1051 1051 """
1052 1052 n = self._match(id)
1053 1053 if n is not None:
1054 1054 return n
1055 1055 n = self._partialmatch(id)
1056 1056 if n:
1057 1057 return n
1058 1058
1059 1059 raise LookupError(id, self.indexfile, _('no match found'))
1060 1060
1061 1061 def cmp(self, node, text):
1062 1062 """compare text with a given file revision
1063 1063
1064 1064 returns True if text is different than what is stored.
1065 1065 """
1066 1066 p1, p2 = self.parents(node)
1067 1067 return hash(text, p1, p2) != node
1068 1068
1069 1069 def _addchunk(self, offset, data):
1070 1070 """Add a segment to the revlog cache.
1071 1071
1072 1072 Accepts an absolute offset and the data that is at that location.
1073 1073 """
1074 1074 o, d = self._chunkcache
1075 1075 # try to add to existing cache
1076 1076 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1077 1077 self._chunkcache = o, d + data
1078 1078 else:
1079 1079 self._chunkcache = offset, data
1080 1080
1081 1081 def _loadchunk(self, offset, length, df=None):
1082 1082 """Load a segment of raw data from the revlog.
1083 1083
1084 1084 Accepts an absolute offset, length to read, and an optional existing
1085 1085 file handle to read from.
1086 1086
1087 1087 If an existing file handle is passed, it will be seeked and the
1088 1088 original seek position will NOT be restored.
1089 1089
1090 1090 Returns a str or buffer of raw byte data.
1091 1091 """
1092 1092 if df is not None:
1093 1093 closehandle = False
1094 1094 else:
1095 1095 if self._inline:
1096 1096 df = self.opener(self.indexfile)
1097 1097 else:
1098 1098 df = self.opener(self.datafile)
1099 1099 closehandle = True
1100 1100
1101 1101 # Cache data both forward and backward around the requested
1102 1102 # data, in a fixed size window. This helps speed up operations
1103 1103 # involving reading the revlog backwards.
1104 1104 cachesize = self._chunkcachesize
1105 1105 realoffset = offset & ~(cachesize - 1)
1106 1106 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1107 1107 - realoffset)
1108 1108 df.seek(realoffset)
1109 1109 d = df.read(reallength)
1110 1110 if closehandle:
1111 1111 df.close()
1112 1112 self._addchunk(realoffset, d)
1113 1113 if offset != realoffset or reallength != length:
1114 1114 return util.buffer(d, offset - realoffset, length)
1115 1115 return d
1116 1116
1117 1117 def _getchunk(self, offset, length, df=None):
1118 1118 """Obtain a segment of raw data from the revlog.
1119 1119
1120 1120 Accepts an absolute offset, length of bytes to obtain, and an
1121 1121 optional file handle to the already-opened revlog. If the file
1122 1122 handle is used, it's original seek position will not be preserved.
1123 1123
1124 1124 Requests for data may be returned from a cache.
1125 1125
1126 1126 Returns a str or a buffer instance of raw byte data.
1127 1127 """
1128 1128 o, d = self._chunkcache
1129 1129 l = len(d)
1130 1130
1131 1131 # is it in the cache?
1132 1132 cachestart = offset - o
1133 1133 cacheend = cachestart + length
1134 1134 if cachestart >= 0 and cacheend <= l:
1135 1135 if cachestart == 0 and cacheend == l:
1136 1136 return d # avoid a copy
1137 1137 return util.buffer(d, cachestart, cacheend - cachestart)
1138 1138
1139 1139 return self._loadchunk(offset, length, df=df)
1140 1140
1141 1141 def _chunkraw(self, startrev, endrev, df=None):
1142 1142 """Obtain a segment of raw data corresponding to a range of revisions.
1143 1143
1144 1144 Accepts the start and end revisions and an optional already-open
1145 1145 file handle to be used for reading. If the file handle is read, its
1146 1146 seek position will not be preserved.
1147 1147
1148 1148 Requests for data may be satisfied by a cache.
1149 1149
1150 1150 Returns a 2-tuple of (offset, data) for the requested range of
1151 1151 revisions. Offset is the integer offset from the beginning of the
1152 1152 revlog and data is a str or buffer of the raw byte data.
1153 1153
1154 1154 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1155 1155 to determine where each revision's data begins and ends.
1156 1156 """
1157 1157 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1158 1158 # (functions are expensive).
1159 1159 index = self.index
1160 1160 istart = index[startrev]
1161 1161 start = int(istart[0] >> 16)
1162 1162 if startrev == endrev:
1163 1163 end = start + istart[1]
1164 1164 else:
1165 1165 iend = index[endrev]
1166 1166 end = int(iend[0] >> 16) + iend[1]
1167 1167
1168 1168 if self._inline:
1169 1169 start += (startrev + 1) * self._io.size
1170 1170 end += (endrev + 1) * self._io.size
1171 1171 length = end - start
1172 1172
1173 1173 return start, self._getchunk(start, length, df=df)
1174 1174
1175 1175 def _chunk(self, rev, df=None):
1176 1176 """Obtain a single decompressed chunk for a revision.
1177 1177
1178 1178 Accepts an integer revision and an optional already-open file handle
1179 1179 to be used for reading. If used, the seek position of the file will not
1180 1180 be preserved.
1181 1181
1182 1182 Returns a str holding uncompressed data for the requested revision.
1183 1183 """
1184 1184 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1185 1185
1186 1186 def _chunks(self, revs, df=None):
1187 1187 """Obtain decompressed chunks for the specified revisions.
1188 1188
1189 1189 Accepts an iterable of numeric revisions that are assumed to be in
1190 1190 ascending order. Also accepts an optional already-open file handle
1191 1191 to be used for reading. If used, the seek position of the file will
1192 1192 not be preserved.
1193 1193
1194 1194 This function is similar to calling ``self._chunk()`` multiple times,
1195 1195 but is faster.
1196 1196
1197 1197 Returns a list with decompressed data for each requested revision.
1198 1198 """
1199 1199 if not revs:
1200 1200 return []
1201 1201 start = self.start
1202 1202 length = self.length
1203 1203 inline = self._inline
1204 1204 iosize = self._io.size
1205 1205 buffer = util.buffer
1206 1206
1207 1207 l = []
1208 1208 ladd = l.append
1209 1209
1210 1210 try:
1211 1211 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1212 1212 except OverflowError:
1213 1213 # issue4215 - we can't cache a run of chunks greater than
1214 1214 # 2G on Windows
1215 1215 return [self._chunk(rev, df=df) for rev in revs]
1216 1216
1217 1217 decomp = self.decompress
1218 1218 for rev in revs:
1219 1219 chunkstart = start(rev)
1220 1220 if inline:
1221 1221 chunkstart += (rev + 1) * iosize
1222 1222 chunklength = length(rev)
1223 1223 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1224 1224
1225 1225 return l
1226 1226
1227 1227 def _chunkclear(self):
1228 1228 """Clear the raw chunk cache."""
1229 1229 self._chunkcache = (0, '')
1230 1230
1231 1231 def deltaparent(self, rev):
1232 1232 """return deltaparent of the given revision"""
1233 1233 base = self.index[rev][3]
1234 1234 if base == rev:
1235 1235 return nullrev
1236 1236 elif self._generaldelta:
1237 1237 return base
1238 1238 else:
1239 1239 return rev - 1
1240 1240
1241 1241 def revdiff(self, rev1, rev2):
1242 1242 """return or calculate a delta between two revisions
1243 1243
1244 1244 The delta calculated is in binary form and is intended to be written to
1245 1245 revlog data directly. So this function needs raw revision data.
1246 1246 """
1247 1247 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1248 1248 return bytes(self._chunk(rev2))
1249 1249
1250 1250 return mdiff.textdiff(self.revision(rev1, raw=True),
1251 1251 self.revision(rev2, raw=True))
1252 1252
1253 1253 def revision(self, nodeorrev, _df=None, raw=False):
1254 1254 """return an uncompressed revision of a given node or revision
1255 1255 number.
1256 1256
1257 1257 _df - an existing file handle to read from. (internal-only)
1258 1258 raw - an optional argument specifying if the revision data is to be
1259 1259 treated as raw data when applying flag transforms. 'raw' should be set
1260 1260 to True when generating changegroups or in debug commands.
1261 1261 """
1262 1262 if isinstance(nodeorrev, int):
1263 1263 rev = nodeorrev
1264 1264 node = self.node(rev)
1265 1265 else:
1266 1266 node = nodeorrev
1267 1267 rev = None
1268 1268
1269 1269 cachedrev = None
1270 1270 if node == nullid:
1271 1271 return ""
1272 1272 if self._cache:
1273 1273 if self._cache[0] == node:
1274 1274 # _cache only stores rawtext
1275 1275 if raw:
1276 1276 return self._cache[2]
1277 1277 cachedrev = self._cache[1]
1278 1278
1279 1279 # look up what we need to read
1280 1280 rawtext = None
1281 1281 if rev is None:
1282 1282 rev = self.rev(node)
1283 1283
1284 1284 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1285 1285 if stopped:
1286 1286 rawtext = self._cache[2]
1287 1287
1288 1288 # drop cache to save memory
1289 1289 self._cache = None
1290 1290
1291 1291 bins = self._chunks(chain, df=_df)
1292 1292 if rawtext is None:
1293 1293 rawtext = bytes(bins[0])
1294 1294 bins = bins[1:]
1295 1295
1296 1296 rawtext = mdiff.patches(rawtext, bins)
1297 1297
1298 1298 text, validatehash = self._processflags(rawtext, self.flags(rev),
1299 1299 'read', raw=raw)
1300 1300 if validatehash:
1301 1301 self.checkhash(text, node, rev=rev)
1302 1302
1303 1303 self._cache = (node, rev, rawtext)
1304 1304 return text
1305 1305
1306 1306 def hash(self, text, p1, p2):
1307 1307 """Compute a node hash.
1308 1308
1309 1309 Available as a function so that subclasses can replace the hash
1310 1310 as needed.
1311 1311 """
1312 1312 return hash(text, p1, p2)
1313 1313
1314 1314 def _processflags(self, text, flags, operation, raw=False):
1315 1315 """Inspect revision data flags and applies transforms defined by
1316 1316 registered flag processors.
1317 1317
1318 1318 ``text`` - the revision data to process
1319 1319 ``flags`` - the revision flags
1320 1320 ``operation`` - the operation being performed (read or write)
1321 1321 ``raw`` - an optional argument describing if the raw transform should be
1322 1322 applied.
1323 1323
1324 1324 This method processes the flags in the order (or reverse order if
1325 1325 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1326 1326 flag processors registered for present flags. The order of flags defined
1327 1327 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1328 1328
1329 1329 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1330 1330 processed text and ``validatehash`` is a bool indicating whether the
1331 1331 returned text should be checked for hash integrity.
1332 1332
1333 1333 Note: If the ``raw`` argument is set, it has precedence over the
1334 1334 operation and will only update the value of ``validatehash``.
1335 1335 """
1336 1336 if not operation in ('read', 'write'):
1337 1337 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1338 1338 # Check all flags are known.
1339 1339 if flags & ~REVIDX_KNOWN_FLAGS:
1340 1340 raise RevlogError(_("incompatible revision flag '%#x'") %
1341 1341 (flags & ~REVIDX_KNOWN_FLAGS))
1342 1342 validatehash = True
1343 1343 # Depending on the operation (read or write), the order might be
1344 1344 # reversed due to non-commutative transforms.
1345 1345 orderedflags = REVIDX_FLAGS_ORDER
1346 1346 if operation == 'write':
1347 1347 orderedflags = reversed(orderedflags)
1348 1348
1349 1349 for flag in orderedflags:
1350 1350 # If a flagprocessor has been registered for a known flag, apply the
1351 1351 # related operation transform and update result tuple.
1352 1352 if flag & flags:
1353 1353 vhash = True
1354 1354
1355 1355 if flag not in _flagprocessors:
1356 1356 message = _("missing processor for flag '%#x'") % (flag)
1357 1357 raise RevlogError(message)
1358 1358
1359 1359 processor = _flagprocessors[flag]
1360 1360 if processor is not None:
1361 1361 readtransform, writetransform, rawtransform = processor
1362 1362
1363 1363 if raw:
1364 1364 vhash = rawtransform(self, text)
1365 1365 elif operation == 'read':
1366 1366 text, vhash = readtransform(self, text)
1367 1367 else: # write operation
1368 1368 text, vhash = writetransform(self, text)
1369 1369 validatehash = validatehash and vhash
1370 1370
1371 1371 return text, validatehash
1372 1372
1373 1373 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1374 1374 """Check node hash integrity.
1375 1375
1376 1376 Available as a function so that subclasses can extend hash mismatch
1377 1377 behaviors as needed.
1378 1378 """
1379 1379 if p1 is None and p2 is None:
1380 1380 p1, p2 = self.parents(node)
1381 1381 if node != self.hash(text, p1, p2):
1382 1382 revornode = rev
1383 1383 if revornode is None:
1384 1384 revornode = templatefilters.short(hex(node))
1385 1385 raise RevlogError(_("integrity check failed on %s:%s")
1386 1386 % (self.indexfile, revornode))
1387 1387
1388 1388 def checkinlinesize(self, tr, fp=None):
1389 1389 """Check if the revlog is too big for inline and convert if so.
1390 1390
1391 1391 This should be called after revisions are added to the revlog. If the
1392 1392 revlog has grown too large to be an inline revlog, it will convert it
1393 1393 to use multiple index and data files.
1394 1394 """
1395 1395 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1396 1396 return
1397 1397
1398 1398 trinfo = tr.find(self.indexfile)
1399 1399 if trinfo is None:
1400 1400 raise RevlogError(_("%s not found in the transaction")
1401 1401 % self.indexfile)
1402 1402
1403 1403 trindex = trinfo[2]
1404 1404 if trindex is not None:
1405 1405 dataoff = self.start(trindex)
1406 1406 else:
1407 1407 # revlog was stripped at start of transaction, use all leftover data
1408 1408 trindex = len(self) - 1
1409 1409 dataoff = self.end(-2)
1410 1410
1411 1411 tr.add(self.datafile, dataoff)
1412 1412
1413 1413 if fp:
1414 1414 fp.flush()
1415 1415 fp.close()
1416 1416
1417 1417 df = self.opener(self.datafile, 'w')
1418 1418 try:
1419 1419 for r in self:
1420 1420 df.write(self._chunkraw(r, r)[1])
1421 1421 finally:
1422 1422 df.close()
1423 1423
1424 1424 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1425 1425 checkambig=self._checkambig)
1426 1426 self.version &= ~(REVLOGNGINLINEDATA)
1427 1427 self._inline = False
1428 1428 for i in self:
1429 1429 e = self._io.packentry(self.index[i], self.node, self.version, i)
1430 1430 fp.write(e)
1431 1431
1432 1432 # if we don't call close, the temp file will never replace the
1433 1433 # real index
1434 1434 fp.close()
1435 1435
1436 1436 tr.replace(self.indexfile, trindex * self._io.size)
1437 1437 self._chunkclear()
1438 1438
1439 1439 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1440 1440 node=None, flags=REVIDX_DEFAULT_FLAGS):
1441 1441 """add a revision to the log
1442 1442
1443 1443 text - the revision data to add
1444 1444 transaction - the transaction object used for rollback
1445 1445 link - the linkrev data to add
1446 1446 p1, p2 - the parent nodeids of the revision
1447 1447 cachedelta - an optional precomputed delta
1448 1448 node - nodeid of revision; typically node is not specified, and it is
1449 1449 computed by default as hash(text, p1, p2), however subclasses might
1450 1450 use different hashing method (and override checkhash() in such case)
1451 1451 flags - the known flags to set on the revision
1452 1452 """
1453 1453 if link == nullrev:
1454 1454 raise RevlogError(_("attempted to add linkrev -1 to %s")
1455 1455 % self.indexfile)
1456 1456
1457 1457 if flags:
1458 1458 node = node or self.hash(text, p1, p2)
1459 1459
1460 1460 rawtext, validatehash = self._processflags(text, flags, 'write')
1461 1461
1462 1462 # If the flag processor modifies the revision data, ignore any provided
1463 1463 # cachedelta.
1464 1464 if rawtext != text:
1465 1465 cachedelta = None
1466 1466
1467 1467 if len(rawtext) > _maxentrysize:
1468 1468 raise RevlogError(
1469 1469 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1470 1470 % (self.indexfile, len(rawtext)))
1471 1471
1472 1472 node = node or self.hash(rawtext, p1, p2)
1473 1473 if node in self.nodemap:
1474 1474 return node
1475 1475
1476 1476 if validatehash:
1477 1477 self.checkhash(rawtext, node, p1=p1, p2=p2)
1478 1478
1479 1479 dfh = None
1480 1480 if not self._inline:
1481 1481 dfh = self.opener(self.datafile, "a+")
1482 1482 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1483 1483 try:
1484 1484 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1485 1485 flags, cachedelta, ifh, dfh)
1486 1486 finally:
1487 1487 if dfh:
1488 1488 dfh.close()
1489 1489 ifh.close()
1490 1490
1491 1491 def compress(self, data):
1492 1492 """Generate a possibly-compressed representation of data."""
1493 1493 if not data:
1494 1494 return '', data
1495 1495
1496 1496 compressed = self._compressor.compress(data)
1497 1497
1498 1498 if compressed:
1499 1499 # The revlog compressor added the header in the returned data.
1500 1500 return '', compressed
1501 1501
1502 1502 if data[0:1] == '\0':
1503 1503 return '', data
1504 1504 return 'u', data
1505 1505
1506 1506 def decompress(self, data):
1507 1507 """Decompress a revlog chunk.
1508 1508
1509 1509 The chunk is expected to begin with a header identifying the
1510 1510 format type so it can be routed to an appropriate decompressor.
1511 1511 """
1512 1512 if not data:
1513 1513 return data
1514 1514
1515 1515 # Revlogs are read much more frequently than they are written and many
1516 1516 # chunks only take microseconds to decompress, so performance is
1517 1517 # important here.
1518 1518 #
1519 1519 # We can make a few assumptions about revlogs:
1520 1520 #
1521 1521 # 1) the majority of chunks will be compressed (as opposed to inline
1522 1522 # raw data).
1523 1523 # 2) decompressing *any* data will likely by at least 10x slower than
1524 1524 # returning raw inline data.
1525 1525 # 3) we want to prioritize common and officially supported compression
1526 1526 # engines
1527 1527 #
1528 1528 # It follows that we want to optimize for "decompress compressed data
1529 1529 # when encoded with common and officially supported compression engines"
1530 1530 # case over "raw data" and "data encoded by less common or non-official
1531 1531 # compression engines." That is why we have the inline lookup first
1532 1532 # followed by the compengines lookup.
1533 1533 #
1534 1534 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1535 1535 # compressed chunks. And this matters for changelog and manifest reads.
1536 1536 t = data[0:1]
1537 1537
1538 1538 if t == 'x':
1539 1539 try:
1540 1540 return _zlibdecompress(data)
1541 1541 except zlib.error as e:
1542 1542 raise RevlogError(_('revlog decompress error: %s') % str(e))
1543 1543 # '\0' is more common than 'u' so it goes first.
1544 1544 elif t == '\0':
1545 1545 return data
1546 1546 elif t == 'u':
1547 1547 return util.buffer(data, 1)
1548 1548
1549 1549 try:
1550 1550 compressor = self._decompressors[t]
1551 1551 except KeyError:
1552 1552 try:
1553 1553 engine = util.compengines.forrevlogheader(t)
1554 1554 compressor = engine.revlogcompressor()
1555 1555 self._decompressors[t] = compressor
1556 1556 except KeyError:
1557 1557 raise RevlogError(_('unknown compression type %r') % t)
1558 1558
1559 1559 return compressor.decompress(data)
1560 1560
1561 1561 def _isgooddelta(self, d, textlen):
1562 1562 """Returns True if the given delta is good. Good means that it is within
1563 1563 the disk span, disk size, and chain length bounds that we know to be
1564 1564 performant."""
1565 1565 if d is None:
1566 1566 return False
1567 1567
1568 1568 # - 'dist' is the distance from the base revision -- bounding it limits
1569 1569 # the amount of I/O we need to do.
1570 1570 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1571 1571 # to apply -- bounding it limits the amount of CPU we consume.
1572 1572 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1573 1573 if (dist > textlen * 4 or l > textlen or
1574 1574 compresseddeltalen > textlen * 2 or
1575 1575 (self._maxchainlen and chainlen > self._maxchainlen)):
1576 1576 return False
1577 1577
1578 1578 return True
1579 1579
1580 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1581 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1580 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1581 cachedelta, ifh, dfh, alwayscache=False):
1582 1582 """internal function to add revisions to the log
1583 1583
1584 1584 see addrevision for argument descriptions.
1585
1586 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1587
1585 1588 invariants:
1586 - text is optional (can be None); if not set, cachedelta must be set.
1589 - rawtext is optional (can be None); if not set, cachedelta must be set.
1587 1590 if both are set, they must correspond to each other.
1588 - raw is optional; if set to True, it indicates the revision data is to
1589 be treated by _processflags() as raw. It is usually set by changegroup
1590 generation and debug commands.
1591 1591 """
1592 btext = [text]
1592 btext = [rawtext]
1593 1593 def buildtext():
1594 1594 if btext[0] is not None:
1595 1595 return btext[0]
1596 1596 baserev = cachedelta[0]
1597 1597 delta = cachedelta[1]
1598 1598 # special case deltas which replace entire base; no need to decode
1599 1599 # base revision. this neatly avoids censored bases, which throw when
1600 1600 # they're decoded.
1601 1601 hlen = struct.calcsize(">lll")
1602 1602 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1603 1603 len(delta) - hlen):
1604 1604 btext[0] = delta[hlen:]
1605 1605 else:
1606 1606 if self._inline:
1607 1607 fh = ifh
1608 1608 else:
1609 1609 fh = dfh
1610 basetext = self.revision(baserev, _df=fh, raw=raw)
1610 basetext = self.revision(baserev, _df=fh, raw=True)
1611 1611 btext[0] = mdiff.patch(basetext, delta)
1612 1612
1613 1613 try:
1614 res = self._processflags(btext[0], flags, 'read', raw=raw)
1614 res = self._processflags(btext[0], flags, 'read', raw=True)
1615 1615 btext[0], validatehash = res
1616 1616 if validatehash:
1617 1617 self.checkhash(btext[0], node, p1=p1, p2=p2)
1618 1618 if flags & REVIDX_ISCENSORED:
1619 1619 raise RevlogError(_('node %s is not censored') % node)
1620 1620 except CensoredNodeError:
1621 1621 # must pass the censored index flag to add censored revisions
1622 1622 if not flags & REVIDX_ISCENSORED:
1623 1623 raise
1624 1624 return btext[0]
1625 1625
1626 1626 def builddelta(rev):
1627 1627 # can we use the cached delta?
1628 1628 if cachedelta and cachedelta[0] == rev:
1629 1629 delta = cachedelta[1]
1630 1630 else:
1631 1631 t = buildtext()
1632 1632 if self.iscensored(rev):
1633 1633 # deltas based on a censored revision must replace the
1634 1634 # full content in one patch, so delta works everywhere
1635 1635 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1636 1636 delta = header + t
1637 1637 else:
1638 1638 if self._inline:
1639 1639 fh = ifh
1640 1640 else:
1641 1641 fh = dfh
1642 1642 ptext = self.revision(rev, _df=fh, raw=True)
1643 1643 delta = mdiff.textdiff(ptext, t)
1644 1644 header, data = self.compress(delta)
1645 1645 deltalen = len(header) + len(data)
1646 1646 chainbase = self.chainbase(rev)
1647 1647 dist = deltalen + offset - self.start(chainbase)
1648 1648 if self._generaldelta:
1649 1649 base = rev
1650 1650 else:
1651 1651 base = chainbase
1652 1652 chainlen, compresseddeltalen = self._chaininfo(rev)
1653 1653 chainlen += 1
1654 1654 compresseddeltalen += deltalen
1655 1655 return (dist, deltalen, (header, data), base,
1656 1656 chainbase, chainlen, compresseddeltalen)
1657 1657
1658 1658 curr = len(self)
1659 1659 prev = curr - 1
1660 1660 offset = self.end(prev)
1661 1661 delta = None
1662 1662 p1r, p2r = self.rev(p1), self.rev(p2)
1663 1663
1664 1664 # full versions are inserted when the needed deltas
1665 1665 # become comparable to the uncompressed text
1666 if text is None:
1666 if rawtext is None:
1667 1667 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1668 1668 cachedelta[1])
1669 1669 else:
1670 textlen = len(text)
1670 textlen = len(rawtext)
1671 1671
1672 1672 # should we try to build a delta?
1673 1673 if prev != nullrev and self.storedeltachains:
1674 1674 tested = set()
1675 1675 # This condition is true most of the time when processing
1676 1676 # changegroup data into a generaldelta repo. The only time it
1677 1677 # isn't true is if this is the first revision in a delta chain
1678 1678 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1679 1679 if cachedelta and self._generaldelta and self._lazydeltabase:
1680 1680 # Assume what we received from the server is a good choice
1681 1681 # build delta will reuse the cache
1682 1682 candidatedelta = builddelta(cachedelta[0])
1683 1683 tested.add(cachedelta[0])
1684 1684 if self._isgooddelta(candidatedelta, textlen):
1685 1685 delta = candidatedelta
1686 1686 if delta is None and self._generaldelta:
1687 1687 # exclude already lazy tested base if any
1688 1688 parents = [p for p in (p1r, p2r)
1689 1689 if p != nullrev and p not in tested]
1690 1690 if parents and not self._aggressivemergedeltas:
1691 1691 # Pick whichever parent is closer to us (to minimize the
1692 1692 # chance of having to build a fulltext).
1693 1693 parents = [max(parents)]
1694 1694 tested.update(parents)
1695 1695 pdeltas = []
1696 1696 for p in parents:
1697 1697 pd = builddelta(p)
1698 1698 if self._isgooddelta(pd, textlen):
1699 1699 pdeltas.append(pd)
1700 1700 if pdeltas:
1701 1701 delta = min(pdeltas, key=lambda x: x[1])
1702 1702 if delta is None and prev not in tested:
1703 1703 # other approach failed try against prev to hopefully save us a
1704 1704 # fulltext.
1705 1705 candidatedelta = builddelta(prev)
1706 1706 if self._isgooddelta(candidatedelta, textlen):
1707 1707 delta = candidatedelta
1708 1708 if delta is not None:
1709 1709 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1710 1710 else:
1711 text = buildtext()
1712 data = self.compress(text)
1711 rawtext = buildtext()
1712 data = self.compress(rawtext)
1713 1713 l = len(data[1]) + len(data[0])
1714 1714 base = chainbase = curr
1715 1715
1716 1716 e = (offset_type(offset, flags), l, textlen,
1717 1717 base, link, p1r, p2r, node)
1718 1718 self.index.insert(-1, e)
1719 1719 self.nodemap[node] = curr
1720 1720
1721 1721 entry = self._io.packentry(e, self.node, self.version, curr)
1722 1722 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1723 1723
1724 if alwayscache and text is None:
1725 text = buildtext()
1724 if alwayscache and rawtext is None:
1725 rawtext = buildtext()
1726 1726
1727 if type(text) == str: # only accept immutable objects
1728 self._cache = (node, curr, text)
1727 if type(rawtext) == str: # only accept immutable objects
1728 self._cache = (node, curr, rawtext)
1729 1729 self._chainbasecache[curr] = chainbase
1730 1730 return node
1731 1731
1732 1732 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1733 1733 # Files opened in a+ mode have inconsistent behavior on various
1734 1734 # platforms. Windows requires that a file positioning call be made
1735 1735 # when the file handle transitions between reads and writes. See
1736 1736 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1737 1737 # platforms, Python or the platform itself can be buggy. Some versions
1738 1738 # of Solaris have been observed to not append at the end of the file
1739 1739 # if the file was seeked to before the end. See issue4943 for more.
1740 1740 #
1741 1741 # We work around this issue by inserting a seek() before writing.
1742 1742 # Note: This is likely not necessary on Python 3.
1743 1743 ifh.seek(0, os.SEEK_END)
1744 1744 if dfh:
1745 1745 dfh.seek(0, os.SEEK_END)
1746 1746
1747 1747 curr = len(self) - 1
1748 1748 if not self._inline:
1749 1749 transaction.add(self.datafile, offset)
1750 1750 transaction.add(self.indexfile, curr * len(entry))
1751 1751 if data[0]:
1752 1752 dfh.write(data[0])
1753 1753 dfh.write(data[1])
1754 1754 ifh.write(entry)
1755 1755 else:
1756 1756 offset += curr * self._io.size
1757 1757 transaction.add(self.indexfile, offset, curr)
1758 1758 ifh.write(entry)
1759 1759 ifh.write(data[0])
1760 1760 ifh.write(data[1])
1761 1761 self.checkinlinesize(transaction, ifh)
1762 1762
1763 1763 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1764 1764 """
1765 1765 add a delta group
1766 1766
1767 1767 given a set of deltas, add them to the revision log. the
1768 1768 first delta is against its parent, which should be in our
1769 1769 log, the rest are against the previous delta.
1770 1770
1771 1771 If ``addrevisioncb`` is defined, it will be called with arguments of
1772 1772 this revlog and the node that was added.
1773 1773 """
1774 1774
1775 1775 # track the base of the current delta log
1776 1776 content = []
1777 1777 node = None
1778 1778
1779 1779 r = len(self)
1780 1780 end = 0
1781 1781 if r:
1782 1782 end = self.end(r - 1)
1783 1783 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1784 1784 isize = r * self._io.size
1785 1785 if self._inline:
1786 1786 transaction.add(self.indexfile, end + isize, r)
1787 1787 dfh = None
1788 1788 else:
1789 1789 transaction.add(self.indexfile, isize, r)
1790 1790 transaction.add(self.datafile, end)
1791 1791 dfh = self.opener(self.datafile, "a+")
1792 1792 def flush():
1793 1793 if dfh:
1794 1794 dfh.flush()
1795 1795 ifh.flush()
1796 1796 try:
1797 1797 # loop through our set of deltas
1798 1798 chain = None
1799 1799 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1800 1800 node = chunkdata['node']
1801 1801 p1 = chunkdata['p1']
1802 1802 p2 = chunkdata['p2']
1803 1803 cs = chunkdata['cs']
1804 1804 deltabase = chunkdata['deltabase']
1805 1805 delta = chunkdata['delta']
1806 1806 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1807 1807
1808 1808 content.append(node)
1809 1809
1810 1810 link = linkmapper(cs)
1811 1811 if node in self.nodemap:
1812 1812 # this can happen if two branches make the same change
1813 1813 chain = node
1814 1814 continue
1815 1815
1816 1816 for p in (p1, p2):
1817 1817 if p not in self.nodemap:
1818 1818 raise LookupError(p, self.indexfile,
1819 1819 _('unknown parent'))
1820 1820
1821 1821 if deltabase not in self.nodemap:
1822 1822 raise LookupError(deltabase, self.indexfile,
1823 1823 _('unknown delta base'))
1824 1824
1825 1825 baserev = self.rev(deltabase)
1826 1826
1827 1827 if baserev != nullrev and self.iscensored(baserev):
1828 1828 # if base is censored, delta must be full replacement in a
1829 1829 # single patch operation
1830 1830 hlen = struct.calcsize(">lll")
1831 1831 oldlen = self.rawsize(baserev)
1832 1832 newlen = len(delta) - hlen
1833 1833 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1834 1834 raise error.CensoredBaseError(self.indexfile,
1835 1835 self.node(baserev))
1836 1836
1837 1837 if not flags and self._peek_iscensored(baserev, delta, flush):
1838 1838 flags |= REVIDX_ISCENSORED
1839 1839
1840 1840 # We assume consumers of addrevisioncb will want to retrieve
1841 1841 # the added revision, which will require a call to
1842 1842 # revision(). revision() will fast path if there is a cache
1843 1843 # hit. So, we tell _addrevision() to always cache in this case.
1844 1844 # We're only using addgroup() in the context of changegroup
1845 1845 # generation so the revision data can always be handled as raw
1846 1846 # by the flagprocessor.
1847 1847 chain = self._addrevision(node, None, transaction, link,
1848 1848 p1, p2, flags, (baserev, delta),
1849 1849 ifh, dfh,
1850 alwayscache=bool(addrevisioncb),
1851 raw=True)
1850 alwayscache=bool(addrevisioncb))
1852 1851
1853 1852 if addrevisioncb:
1854 1853 addrevisioncb(self, chain)
1855 1854
1856 1855 if not dfh and not self._inline:
1857 1856 # addrevision switched from inline to conventional
1858 1857 # reopen the index
1859 1858 ifh.close()
1860 1859 dfh = self.opener(self.datafile, "a+")
1861 1860 ifh = self.opener(self.indexfile, "a+",
1862 1861 checkambig=self._checkambig)
1863 1862 finally:
1864 1863 if dfh:
1865 1864 dfh.close()
1866 1865 ifh.close()
1867 1866
1868 1867 return content
1869 1868
1870 1869 def iscensored(self, rev):
1871 1870 """Check if a file revision is censored."""
1872 1871 return False
1873 1872
1874 1873 def _peek_iscensored(self, baserev, delta, flush):
1875 1874 """Quickly check if a delta produces a censored revision."""
1876 1875 return False
1877 1876
1878 1877 def getstrippoint(self, minlink):
1879 1878 """find the minimum rev that must be stripped to strip the linkrev
1880 1879
1881 1880 Returns a tuple containing the minimum rev and a set of all revs that
1882 1881 have linkrevs that will be broken by this strip.
1883 1882 """
1884 1883 brokenrevs = set()
1885 1884 strippoint = len(self)
1886 1885
1887 1886 heads = {}
1888 1887 futurelargelinkrevs = set()
1889 1888 for head in self.headrevs():
1890 1889 headlinkrev = self.linkrev(head)
1891 1890 heads[head] = headlinkrev
1892 1891 if headlinkrev >= minlink:
1893 1892 futurelargelinkrevs.add(headlinkrev)
1894 1893
1895 1894 # This algorithm involves walking down the rev graph, starting at the
1896 1895 # heads. Since the revs are topologically sorted according to linkrev,
1897 1896 # once all head linkrevs are below the minlink, we know there are
1898 1897 # no more revs that could have a linkrev greater than minlink.
1899 1898 # So we can stop walking.
1900 1899 while futurelargelinkrevs:
1901 1900 strippoint -= 1
1902 1901 linkrev = heads.pop(strippoint)
1903 1902
1904 1903 if linkrev < minlink:
1905 1904 brokenrevs.add(strippoint)
1906 1905 else:
1907 1906 futurelargelinkrevs.remove(linkrev)
1908 1907
1909 1908 for p in self.parentrevs(strippoint):
1910 1909 if p != nullrev:
1911 1910 plinkrev = self.linkrev(p)
1912 1911 heads[p] = plinkrev
1913 1912 if plinkrev >= minlink:
1914 1913 futurelargelinkrevs.add(plinkrev)
1915 1914
1916 1915 return strippoint, brokenrevs
1917 1916
1918 1917 def strip(self, minlink, transaction):
1919 1918 """truncate the revlog on the first revision with a linkrev >= minlink
1920 1919
1921 1920 This function is called when we're stripping revision minlink and
1922 1921 its descendants from the repository.
1923 1922
1924 1923 We have to remove all revisions with linkrev >= minlink, because
1925 1924 the equivalent changelog revisions will be renumbered after the
1926 1925 strip.
1927 1926
1928 1927 So we truncate the revlog on the first of these revisions, and
1929 1928 trust that the caller has saved the revisions that shouldn't be
1930 1929 removed and that it'll re-add them after this truncation.
1931 1930 """
1932 1931 if len(self) == 0:
1933 1932 return
1934 1933
1935 1934 rev, _ = self.getstrippoint(minlink)
1936 1935 if rev == len(self):
1937 1936 return
1938 1937
1939 1938 # first truncate the files on disk
1940 1939 end = self.start(rev)
1941 1940 if not self._inline:
1942 1941 transaction.add(self.datafile, end)
1943 1942 end = rev * self._io.size
1944 1943 else:
1945 1944 end += rev * self._io.size
1946 1945
1947 1946 transaction.add(self.indexfile, end)
1948 1947
1949 1948 # then reset internal state in memory to forget those revisions
1950 1949 self._cache = None
1951 1950 self._chaininfocache = {}
1952 1951 self._chunkclear()
1953 1952 for x in xrange(rev, len(self)):
1954 1953 del self.nodemap[self.node(x)]
1955 1954
1956 1955 del self.index[rev:-1]
1957 1956
1958 1957 def checksize(self):
1959 1958 expected = 0
1960 1959 if len(self):
1961 1960 expected = max(0, self.end(len(self) - 1))
1962 1961
1963 1962 try:
1964 1963 f = self.opener(self.datafile)
1965 1964 f.seek(0, 2)
1966 1965 actual = f.tell()
1967 1966 f.close()
1968 1967 dd = actual - expected
1969 1968 except IOError as inst:
1970 1969 if inst.errno != errno.ENOENT:
1971 1970 raise
1972 1971 dd = 0
1973 1972
1974 1973 try:
1975 1974 f = self.opener(self.indexfile)
1976 1975 f.seek(0, 2)
1977 1976 actual = f.tell()
1978 1977 f.close()
1979 1978 s = self._io.size
1980 1979 i = max(0, actual // s)
1981 1980 di = actual - (i * s)
1982 1981 if self._inline:
1983 1982 databytes = 0
1984 1983 for r in self:
1985 1984 databytes += max(0, self.length(r))
1986 1985 dd = 0
1987 1986 di = actual - len(self) * s - databytes
1988 1987 except IOError as inst:
1989 1988 if inst.errno != errno.ENOENT:
1990 1989 raise
1991 1990 di = 0
1992 1991
1993 1992 return (dd, di)
1994 1993
1995 1994 def files(self):
1996 1995 res = [self.indexfile]
1997 1996 if not self._inline:
1998 1997 res.append(self.datafile)
1999 1998 return res
2000 1999
2001 2000 DELTAREUSEALWAYS = 'always'
2002 2001 DELTAREUSESAMEREVS = 'samerevs'
2003 2002 DELTAREUSENEVER = 'never'
2004 2003
2005 2004 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
2006 2005
2007 2006 def clone(self, tr, destrevlog, addrevisioncb=None,
2008 2007 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2009 2008 """Copy this revlog to another, possibly with format changes.
2010 2009
2011 2010 The destination revlog will contain the same revisions and nodes.
2012 2011 However, it may not be bit-for-bit identical due to e.g. delta encoding
2013 2012 differences.
2014 2013
2015 2014 The ``deltareuse`` argument control how deltas from the existing revlog
2016 2015 are preserved in the destination revlog. The argument can have the
2017 2016 following values:
2018 2017
2019 2018 DELTAREUSEALWAYS
2020 2019 Deltas will always be reused (if possible), even if the destination
2021 2020 revlog would not select the same revisions for the delta. This is the
2022 2021 fastest mode of operation.
2023 2022 DELTAREUSESAMEREVS
2024 2023 Deltas will be reused if the destination revlog would pick the same
2025 2024 revisions for the delta. This mode strikes a balance between speed
2026 2025 and optimization.
2027 2026 DELTAREUSENEVER
2028 2027 Deltas will never be reused. This is the slowest mode of execution.
2029 2028 This mode can be used to recompute deltas (e.g. if the diff/delta
2030 2029 algorithm changes).
2031 2030
2032 2031 Delta computation can be slow, so the choice of delta reuse policy can
2033 2032 significantly affect run time.
2034 2033
2035 2034 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2036 2035 two extremes. Deltas will be reused if they are appropriate. But if the
2037 2036 delta could choose a better revision, it will do so. This means if you
2038 2037 are converting a non-generaldelta revlog to a generaldelta revlog,
2039 2038 deltas will be recomputed if the delta's parent isn't a parent of the
2040 2039 revision.
2041 2040
2042 2041 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2043 2042 controls whether to compute deltas against both parents for merges.
2044 2043 By default, the current default is used.
2045 2044 """
2046 2045 if deltareuse not in self.DELTAREUSEALL:
2047 2046 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2048 2047
2049 2048 if len(destrevlog):
2050 2049 raise ValueError(_('destination revlog is not empty'))
2051 2050
2052 2051 if getattr(self, 'filteredrevs', None):
2053 2052 raise ValueError(_('source revlog has filtered revisions'))
2054 2053 if getattr(destrevlog, 'filteredrevs', None):
2055 2054 raise ValueError(_('destination revlog has filtered revisions'))
2056 2055
2057 2056 # lazydeltabase controls whether to reuse a cached delta, if possible.
2058 2057 oldlazydeltabase = destrevlog._lazydeltabase
2059 2058 oldamd = destrevlog._aggressivemergedeltas
2060 2059
2061 2060 try:
2062 2061 if deltareuse == self.DELTAREUSEALWAYS:
2063 2062 destrevlog._lazydeltabase = True
2064 2063 elif deltareuse == self.DELTAREUSESAMEREVS:
2065 2064 destrevlog._lazydeltabase = False
2066 2065
2067 2066 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2068 2067
2069 2068 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2070 2069 self.DELTAREUSESAMEREVS)
2071 2070
2072 2071 index = self.index
2073 2072 for rev in self:
2074 2073 entry = index[rev]
2075 2074
2076 2075 # Some classes override linkrev to take filtered revs into
2077 2076 # account. Use raw entry from index.
2078 2077 flags = entry[0] & 0xffff
2079 2078 linkrev = entry[4]
2080 2079 p1 = index[entry[5]][7]
2081 2080 p2 = index[entry[6]][7]
2082 2081 node = entry[7]
2083 2082
2084 2083 # (Possibly) reuse the delta from the revlog if allowed and
2085 2084 # the revlog chunk is a delta.
2086 2085 cachedelta = None
2087 2086 rawtext = None
2088 2087 if populatecachedelta:
2089 2088 dp = self.deltaparent(rev)
2090 2089 if dp != nullrev:
2091 2090 cachedelta = (dp, str(self._chunk(rev)))
2092 2091
2093 2092 if not cachedelta:
2094 2093 rawtext = self.revision(rev, raw=True)
2095 2094
2096 2095 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2097 2096 checkambig=False)
2098 2097 dfh = None
2099 2098 if not destrevlog._inline:
2100 2099 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2101 2100 try:
2102 2101 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2103 2102 flags, cachedelta, ifh, dfh)
2104 2103 finally:
2105 2104 if dfh:
2106 2105 dfh.close()
2107 2106 ifh.close()
2108 2107
2109 2108 if addrevisioncb:
2110 2109 addrevisioncb(self, rev, node)
2111 2110 finally:
2112 2111 destrevlog._lazydeltabase = oldlazydeltabase
2113 2112 destrevlog._aggressivemergedeltas = oldamd
@@ -1,4 +1,4
1 1 local test passed
2 2 addgroupcopy test passed
3 3 clone test passed
4 abort: crashed: integrity check failed on _destrevlog.i:5
4 lowlevelcopy test passed
General Comments 0
You need to be logged in to leave comments. Login now