##// END OF EJS Templates
shortest: make pure code also disambigute against revnums at end...
Martin von Zweigbergk -
r37988:0db7fe7c default
parent child Browse files
Show More
@@ -1,2623 +1,2614 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 curr = len(revlog)
309 309 prev = curr - 1
310 310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 311
312 312 # should we try to build a delta?
313 313 if prev != nullrev and revlog.storedeltachains:
314 314 tested = set()
315 315 # This condition is true most of the time when processing
316 316 # changegroup data into a generaldelta repo. The only time it
317 317 # isn't true is if this is the first revision in a delta chain
318 318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 320 # Assume what we received from the server is a good choice
321 321 # build delta will reuse the cache
322 322 yield (cachedelta[0],)
323 323 tested.add(cachedelta[0])
324 324
325 325 if revlog._generaldelta:
326 326 # exclude already lazy tested base if any
327 327 parents = [p for p in (p1r, p2r)
328 328 if p != nullrev and p not in tested]
329 329 if parents and not revlog._aggressivemergedeltas:
330 330 # Pick whichever parent is closer to us (to minimize the
331 331 # chance of having to build a fulltext).
332 332 parents = [max(parents)]
333 333 tested.update(parents)
334 334 yield parents
335 335
336 336 if prev not in tested:
337 337 # other approach failed try against prev to hopefully save us a
338 338 # fulltext.
339 339 yield (prev,)
340 340
341 341 def buildtext(self, revinfo, fh):
342 342 """Builds a fulltext version of a revision
343 343
344 344 revinfo: _revisioninfo instance that contains all needed info
345 345 fh: file handle to either the .i or the .d revlog file,
346 346 depending on whether it is inlined or not
347 347 """
348 348 btext = revinfo.btext
349 349 if btext[0] is not None:
350 350 return btext[0]
351 351
352 352 revlog = self.revlog
353 353 cachedelta = revinfo.cachedelta
354 354 flags = revinfo.flags
355 355 node = revinfo.node
356 356
357 357 baserev = cachedelta[0]
358 358 delta = cachedelta[1]
359 359 # special case deltas which replace entire base; no need to decode
360 360 # base revision. this neatly avoids censored bases, which throw when
361 361 # they're decoded.
362 362 hlen = struct.calcsize(">lll")
363 363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 364 len(delta) - hlen):
365 365 btext[0] = delta[hlen:]
366 366 else:
367 367 # deltabase is rawtext before changed by flag processors, which is
368 368 # equivalent to non-raw text
369 369 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 370 btext[0] = mdiff.patch(basetext, delta)
371 371
372 372 try:
373 373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 374 btext[0], validatehash = res
375 375 if validatehash:
376 376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 377 if flags & REVIDX_ISCENSORED:
378 378 raise RevlogError(_('node %s is not censored') % node)
379 379 except CensoredNodeError:
380 380 # must pass the censored index flag to add censored revisions
381 381 if not flags & REVIDX_ISCENSORED:
382 382 raise
383 383 return btext[0]
384 384
385 385 def _builddeltadiff(self, base, revinfo, fh):
386 386 revlog = self.revlog
387 387 t = self.buildtext(revinfo, fh)
388 388 if revlog.iscensored(base):
389 389 # deltas based on a censored revision must replace the
390 390 # full content in one patch, so delta works everywhere
391 391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 392 delta = header + t
393 393 else:
394 394 ptext = revlog.revision(base, _df=fh, raw=True)
395 395 delta = mdiff.textdiff(ptext, t)
396 396
397 397 return delta
398 398
399 399 def _builddeltainfo(self, revinfo, base, fh):
400 400 # can we use the cached delta?
401 401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 402 delta = revinfo.cachedelta[1]
403 403 else:
404 404 delta = self._builddeltadiff(base, revinfo, fh)
405 405 revlog = self.revlog
406 406 header, data = revlog.compress(delta)
407 407 deltalen = len(header) + len(data)
408 408 chainbase = revlog.chainbase(base)
409 409 offset = revlog.end(len(revlog) - 1)
410 410 dist = deltalen + offset - revlog.start(chainbase)
411 411 if revlog._generaldelta:
412 412 deltabase = base
413 413 else:
414 414 deltabase = chainbase
415 415 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 416 chainlen += 1
417 417 compresseddeltalen += deltalen
418 418 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 419 chainbase, chainlen, compresseddeltalen)
420 420
421 421 def finddeltainfo(self, revinfo, fh):
422 422 """Find an acceptable delta against a candidate revision
423 423
424 424 revinfo: information about the revision (instance of _revisioninfo)
425 425 fh: file handle to either the .i or the .d revlog file,
426 426 depending on whether it is inlined or not
427 427
428 428 Returns the first acceptable candidate revision, as ordered by
429 429 _getcandidaterevs
430 430 """
431 431 cachedelta = revinfo.cachedelta
432 432 p1 = revinfo.p1
433 433 p2 = revinfo.p2
434 434 revlog = self.revlog
435 435
436 436 deltainfo = None
437 437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 438 nominateddeltas = []
439 439 for candidaterev in candidaterevs:
440 440 # no delta for rawtext-changing revs (see "candelta" for why)
441 441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 442 continue
443 443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 445 nominateddeltas.append(candidatedelta)
446 446 if nominateddeltas:
447 447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 448 break
449 449
450 450 return deltainfo
451 451
452 452 @attr.s(slots=True, frozen=True)
453 453 class _revisioninfo(object):
454 454 """Information about a revision that allows building its fulltext
455 455 node: expected hash of the revision
456 456 p1, p2: parent revs of the revision
457 457 btext: built text cache consisting of a one-element list
458 458 cachedelta: (baserev, uncompressed_delta) or None
459 459 flags: flags associated to the revision storage
460 460
461 461 One of btext[0] or cachedelta must be set.
462 462 """
463 463 node = attr.ib()
464 464 p1 = attr.ib()
465 465 p2 = attr.ib()
466 466 btext = attr.ib()
467 467 textlen = attr.ib()
468 468 cachedelta = attr.ib()
469 469 flags = attr.ib()
470 470
471 471 # index v0:
472 472 # 4 bytes: offset
473 473 # 4 bytes: compressed length
474 474 # 4 bytes: base rev
475 475 # 4 bytes: link rev
476 476 # 20 bytes: parent 1 nodeid
477 477 # 20 bytes: parent 2 nodeid
478 478 # 20 bytes: nodeid
479 479 indexformatv0 = struct.Struct(">4l20s20s20s")
480 480 indexformatv0_pack = indexformatv0.pack
481 481 indexformatv0_unpack = indexformatv0.unpack
482 482
483 483 class revlogoldio(object):
484 484 def __init__(self):
485 485 self.size = indexformatv0.size
486 486
487 487 def parseindex(self, data, inline):
488 488 s = self.size
489 489 index = []
490 490 nodemap = {nullid: nullrev}
491 491 n = off = 0
492 492 l = len(data)
493 493 while off + s <= l:
494 494 cur = data[off:off + s]
495 495 off += s
496 496 e = indexformatv0_unpack(cur)
497 497 # transform to revlogv1 format
498 498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 500 index.append(e2)
501 501 nodemap[e[6]] = n
502 502 n += 1
503 503
504 504 # add the magic null revision at -1
505 505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506 506
507 507 return index, nodemap, None
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 if gettype(entry[0]):
511 511 raise RevlogError(_('index entry flags need revlog version 1'))
512 512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 513 node(entry[5]), node(entry[6]), entry[7])
514 514 return indexformatv0_pack(*e2)
515 515
516 516 # index ng:
517 517 # 6 bytes: offset
518 518 # 2 bytes: flags
519 519 # 4 bytes: compressed length
520 520 # 4 bytes: uncompressed length
521 521 # 4 bytes: base rev
522 522 # 4 bytes: link rev
523 523 # 4 bytes: parent 1 rev
524 524 # 4 bytes: parent 2 rev
525 525 # 32 bytes: nodeid
526 526 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 527 indexformatng_pack = indexformatng.pack
528 528 versionformat = struct.Struct(">I")
529 529 versionformat_pack = versionformat.pack
530 530 versionformat_unpack = versionformat.unpack
531 531
532 532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 533 # signed integer)
534 534 _maxentrysize = 0x7fffffff
535 535
536 536 class revlogio(object):
537 537 def __init__(self):
538 538 self.size = indexformatng.size
539 539
540 540 def parseindex(self, data, inline):
541 541 # call the C implementation to parse the index data
542 542 index, cache = parsers.parse_index2(data, inline)
543 543 return index, getattr(index, 'nodemap', None), cache
544 544
545 545 def packentry(self, entry, node, version, rev):
546 546 p = indexformatng_pack(*entry)
547 547 if rev == 0:
548 548 p = versionformat_pack(version) + p[4:]
549 549 return p
550 550
551 551 class revlog(object):
552 552 """
553 553 the underlying revision storage object
554 554
555 555 A revlog consists of two parts, an index and the revision data.
556 556
557 557 The index is a file with a fixed record size containing
558 558 information on each revision, including its nodeid (hash), the
559 559 nodeids of its parents, the position and offset of its data within
560 560 the data file, and the revision it's based on. Finally, each entry
561 561 contains a linkrev entry that can serve as a pointer to external
562 562 data.
563 563
564 564 The revision data itself is a linear collection of data chunks.
565 565 Each chunk represents a revision and is usually represented as a
566 566 delta against the previous chunk. To bound lookup time, runs of
567 567 deltas are limited to about 2 times the length of the original
568 568 version data. This makes retrieval of a version proportional to
569 569 its size, or O(1) relative to the number of revisions.
570 570
571 571 Both pieces of the revlog are written to in an append-only
572 572 fashion, which means we never need to rewrite a file to insert or
573 573 remove data, and can use some simple techniques to avoid the need
574 574 for locking while reading.
575 575
576 576 If checkambig, indexfile is opened with checkambig=True at
577 577 writing, to avoid file stat ambiguity.
578 578
579 579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 580 index will be mmapped rather than read if it is larger than the
581 581 configured threshold.
582 582
583 583 If censorable is True, the revlog can have censored revisions.
584 584 """
585 585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 586 mmaplargeindex=False, censorable=False):
587 587 """
588 588 create a revlog object
589 589
590 590 opener is a function that abstracts the file opening operation
591 591 and can be used to implement COW semantics or the like.
592 592 """
593 593 self.indexfile = indexfile
594 594 self.datafile = datafile or (indexfile[:-2] + ".d")
595 595 self.opener = opener
596 596 # When True, indexfile is opened with checkambig=True at writing, to
597 597 # avoid file stat ambiguity.
598 598 self._checkambig = checkambig
599 599 self._censorable = censorable
600 600 # 3-tuple of (node, rev, text) for a raw revision.
601 601 self._cache = None
602 602 # Maps rev to chain base rev.
603 603 self._chainbasecache = util.lrucachedict(100)
604 604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 605 self._chunkcache = (0, '')
606 606 # How much data to read and cache into the raw revlog data cache.
607 607 self._chunkcachesize = 65536
608 608 self._maxchainlen = None
609 609 self._aggressivemergedeltas = False
610 610 self.index = []
611 611 # Mapping of partial identifiers to full nodes.
612 612 self._pcache = {}
613 613 # Mapping of revision integer to full node.
614 614 self._nodecache = {nullid: nullrev}
615 615 self._nodepos = None
616 616 self._compengine = 'zlib'
617 617 self._maxdeltachainspan = -1
618 618 self._withsparseread = False
619 619 self._srdensitythreshold = 0.25
620 620 self._srmingapsize = 262144
621 621
622 622 mmapindexthreshold = None
623 623 v = REVLOG_DEFAULT_VERSION
624 624 opts = getattr(opener, 'options', None)
625 625 if opts is not None:
626 626 if 'revlogv2' in opts:
627 627 # version 2 revlogs always use generaldelta.
628 628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 629 elif 'revlogv1' in opts:
630 630 if 'generaldelta' in opts:
631 631 v |= FLAG_GENERALDELTA
632 632 else:
633 633 v = 0
634 634 if 'chunkcachesize' in opts:
635 635 self._chunkcachesize = opts['chunkcachesize']
636 636 if 'maxchainlen' in opts:
637 637 self._maxchainlen = opts['maxchainlen']
638 638 if 'aggressivemergedeltas' in opts:
639 639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 641 if 'compengine' in opts:
642 642 self._compengine = opts['compengine']
643 643 if 'maxdeltachainspan' in opts:
644 644 self._maxdeltachainspan = opts['maxdeltachainspan']
645 645 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 646 mmapindexthreshold = opts['mmapindexthreshold']
647 647 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 648 if 'sparse-read-density-threshold' in opts:
649 649 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 650 if 'sparse-read-min-gap-size' in opts:
651 651 self._srmingapsize = opts['sparse-read-min-gap-size']
652 652
653 653 if self._chunkcachesize <= 0:
654 654 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 655 'than 0') % self._chunkcachesize)
656 656 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 657 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 658 'of 2') % self._chunkcachesize)
659 659
660 660 indexdata = ''
661 661 self._initempty = True
662 662 try:
663 663 with self._indexfp() as f:
664 664 if (mmapindexthreshold is not None and
665 665 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 666 indexdata = util.buffer(util.mmapread(f))
667 667 else:
668 668 indexdata = f.read()
669 669 if len(indexdata) > 0:
670 670 v = versionformat_unpack(indexdata[:4])[0]
671 671 self._initempty = False
672 672 except IOError as inst:
673 673 if inst.errno != errno.ENOENT:
674 674 raise
675 675
676 676 self.version = v
677 677 self._inline = v & FLAG_INLINE_DATA
678 678 self._generaldelta = v & FLAG_GENERALDELTA
679 679 flags = v & ~0xFFFF
680 680 fmt = v & 0xFFFF
681 681 if fmt == REVLOGV0:
682 682 if flags:
683 683 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 684 'revlog %s') %
685 685 (flags >> 16, fmt, self.indexfile))
686 686 elif fmt == REVLOGV1:
687 687 if flags & ~REVLOGV1_FLAGS:
688 688 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 689 'revlog %s') %
690 690 (flags >> 16, fmt, self.indexfile))
691 691 elif fmt == REVLOGV2:
692 692 if flags & ~REVLOGV2_FLAGS:
693 693 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 694 'revlog %s') %
695 695 (flags >> 16, fmt, self.indexfile))
696 696 else:
697 697 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 698 (fmt, self.indexfile))
699 699
700 700 self.storedeltachains = True
701 701
702 702 self._io = revlogio()
703 703 if self.version == REVLOGV0:
704 704 self._io = revlogoldio()
705 705 try:
706 706 d = self._io.parseindex(indexdata, self._inline)
707 707 except (ValueError, IndexError):
708 708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 709 self.index, nodemap, self._chunkcache = d
710 710 if nodemap is not None:
711 711 self.nodemap = self._nodecache = nodemap
712 712 if not self._chunkcache:
713 713 self._chunkclear()
714 714 # revnum -> (chain-length, sum-delta-length)
715 715 self._chaininfocache = {}
716 716 # revlog header -> revlog compressor
717 717 self._decompressors = {}
718 718
719 719 @util.propertycache
720 720 def _compressor(self):
721 721 return util.compengines[self._compengine].revlogcompressor()
722 722
723 723 def _indexfp(self, mode='r'):
724 724 """file object for the revlog's index file"""
725 725 args = {r'mode': mode}
726 726 if mode != 'r':
727 727 args[r'checkambig'] = self._checkambig
728 728 if mode == 'w':
729 729 args[r'atomictemp'] = True
730 730 return self.opener(self.indexfile, **args)
731 731
732 732 def _datafp(self, mode='r'):
733 733 """file object for the revlog's data file"""
734 734 return self.opener(self.datafile, mode=mode)
735 735
736 736 @contextlib.contextmanager
737 737 def _datareadfp(self, existingfp=None):
738 738 """file object suitable to read data"""
739 739 if existingfp is not None:
740 740 yield existingfp
741 741 else:
742 742 if self._inline:
743 743 func = self._indexfp
744 744 else:
745 745 func = self._datafp
746 746 with func() as fp:
747 747 yield fp
748 748
749 749 def tip(self):
750 750 return self.node(len(self.index) - 2)
751 751 def __contains__(self, rev):
752 752 return 0 <= rev < len(self)
753 753 def __len__(self):
754 754 return len(self.index) - 1
755 755 def __iter__(self):
756 756 return iter(xrange(len(self)))
757 757 def revs(self, start=0, stop=None):
758 758 """iterate over all rev in this revlog (from start to stop)"""
759 759 step = 1
760 760 if stop is not None:
761 761 if start > stop:
762 762 step = -1
763 763 stop += step
764 764 else:
765 765 stop = len(self)
766 766 return xrange(start, stop, step)
767 767
768 768 @util.propertycache
769 769 def nodemap(self):
770 770 self.rev(self.node(0))
771 771 return self._nodecache
772 772
773 773 def hasnode(self, node):
774 774 try:
775 775 self.rev(node)
776 776 return True
777 777 except KeyError:
778 778 return False
779 779
780 780 def candelta(self, baserev, rev):
781 781 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 782 # Disable delta if either rev requires a content-changing flag
783 783 # processor (ex. LFS). This is because such flag processor can alter
784 784 # the rawtext content that the delta will be based on, and two clients
785 785 # could have a same revlog node with different flags (i.e. different
786 786 # rawtext contents) and the delta could be incompatible.
787 787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 789 return False
790 790 return True
791 791
792 792 def clearcaches(self):
793 793 self._cache = None
794 794 self._chainbasecache.clear()
795 795 self._chunkcache = (0, '')
796 796 self._pcache = {}
797 797
798 798 try:
799 799 self._nodecache.clearcaches()
800 800 except AttributeError:
801 801 self._nodecache = {nullid: nullrev}
802 802 self._nodepos = None
803 803
804 804 def rev(self, node):
805 805 try:
806 806 return self._nodecache[node]
807 807 except TypeError:
808 808 raise
809 809 except RevlogError:
810 810 # parsers.c radix tree lookup failed
811 811 if node == wdirid or node in wdirfilenodeids:
812 812 raise error.WdirUnsupported
813 813 raise LookupError(node, self.indexfile, _('no node'))
814 814 except KeyError:
815 815 # pure python cache lookup failed
816 816 n = self._nodecache
817 817 i = self.index
818 818 p = self._nodepos
819 819 if p is None:
820 820 p = len(i) - 2
821 821 else:
822 822 assert p < len(i)
823 823 for r in xrange(p, -1, -1):
824 824 v = i[r][7]
825 825 n[v] = r
826 826 if v == node:
827 827 self._nodepos = r - 1
828 828 return r
829 829 if node == wdirid or node in wdirfilenodeids:
830 830 raise error.WdirUnsupported
831 831 raise LookupError(node, self.indexfile, _('no node'))
832 832
833 833 # Accessors for index entries.
834 834
835 835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 836 # are flags.
837 837 def start(self, rev):
838 838 return int(self.index[rev][0] >> 16)
839 839
840 840 def flags(self, rev):
841 841 return self.index[rev][0] & 0xFFFF
842 842
843 843 def length(self, rev):
844 844 return self.index[rev][1]
845 845
846 846 def rawsize(self, rev):
847 847 """return the length of the uncompressed text for a given revision"""
848 848 l = self.index[rev][2]
849 849 if l >= 0:
850 850 return l
851 851
852 852 t = self.revision(rev, raw=True)
853 853 return len(t)
854 854
855 855 def size(self, rev):
856 856 """length of non-raw text (processed by a "read" flag processor)"""
857 857 # fast path: if no "read" flag processor could change the content,
858 858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 859 flags = self.flags(rev)
860 860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 861 return self.rawsize(rev)
862 862
863 863 return len(self.revision(rev, raw=False))
864 864
865 865 def chainbase(self, rev):
866 866 base = self._chainbasecache.get(rev)
867 867 if base is not None:
868 868 return base
869 869
870 870 index = self.index
871 871 base = index[rev][3]
872 872 while base != rev:
873 873 rev = base
874 874 base = index[rev][3]
875 875
876 876 self._chainbasecache[rev] = base
877 877 return base
878 878
879 879 def linkrev(self, rev):
880 880 return self.index[rev][4]
881 881
882 882 def parentrevs(self, rev):
883 883 try:
884 884 entry = self.index[rev]
885 885 except IndexError:
886 886 if rev == wdirrev:
887 887 raise error.WdirUnsupported
888 888 raise
889 889
890 890 return entry[5], entry[6]
891 891
892 892 def node(self, rev):
893 893 try:
894 894 return self.index[rev][7]
895 895 except IndexError:
896 896 if rev == wdirrev:
897 897 raise error.WdirUnsupported
898 898 raise
899 899
900 900 # Derived from index values.
901 901
902 902 def end(self, rev):
903 903 return self.start(rev) + self.length(rev)
904 904
905 905 def parents(self, node):
906 906 i = self.index
907 907 d = i[self.rev(node)]
908 908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909 909
910 910 def chainlen(self, rev):
911 911 return self._chaininfo(rev)[0]
912 912
913 913 def _chaininfo(self, rev):
914 914 chaininfocache = self._chaininfocache
915 915 if rev in chaininfocache:
916 916 return chaininfocache[rev]
917 917 index = self.index
918 918 generaldelta = self._generaldelta
919 919 iterrev = rev
920 920 e = index[iterrev]
921 921 clen = 0
922 922 compresseddeltalen = 0
923 923 while iterrev != e[3]:
924 924 clen += 1
925 925 compresseddeltalen += e[1]
926 926 if generaldelta:
927 927 iterrev = e[3]
928 928 else:
929 929 iterrev -= 1
930 930 if iterrev in chaininfocache:
931 931 t = chaininfocache[iterrev]
932 932 clen += t[0]
933 933 compresseddeltalen += t[1]
934 934 break
935 935 e = index[iterrev]
936 936 else:
937 937 # Add text length of base since decompressing that also takes
938 938 # work. For cache hits the length is already included.
939 939 compresseddeltalen += e[1]
940 940 r = (clen, compresseddeltalen)
941 941 chaininfocache[rev] = r
942 942 return r
943 943
944 944 def _deltachain(self, rev, stoprev=None):
945 945 """Obtain the delta chain for a revision.
946 946
947 947 ``stoprev`` specifies a revision to stop at. If not specified, we
948 948 stop at the base of the chain.
949 949
950 950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 951 revs in ascending order and ``stopped`` is a bool indicating whether
952 952 ``stoprev`` was hit.
953 953 """
954 954 # Try C implementation.
955 955 try:
956 956 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 957 except AttributeError:
958 958 pass
959 959
960 960 chain = []
961 961
962 962 # Alias to prevent attribute lookup in tight loop.
963 963 index = self.index
964 964 generaldelta = self._generaldelta
965 965
966 966 iterrev = rev
967 967 e = index[iterrev]
968 968 while iterrev != e[3] and iterrev != stoprev:
969 969 chain.append(iterrev)
970 970 if generaldelta:
971 971 iterrev = e[3]
972 972 else:
973 973 iterrev -= 1
974 974 e = index[iterrev]
975 975
976 976 if iterrev == stoprev:
977 977 stopped = True
978 978 else:
979 979 chain.append(iterrev)
980 980 stopped = False
981 981
982 982 chain.reverse()
983 983 return chain, stopped
984 984
985 985 def ancestors(self, revs, stoprev=0, inclusive=False):
986 986 """Generate the ancestors of 'revs' in reverse topological order.
987 987 Does not generate revs lower than stoprev.
988 988
989 989 See the documentation for ancestor.lazyancestors for more details."""
990 990
991 991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 992 inclusive=inclusive)
993 993
994 994 def descendants(self, revs):
995 995 """Generate the descendants of 'revs' in revision order.
996 996
997 997 Yield a sequence of revision numbers starting with a child of
998 998 some rev in revs, i.e., each revision is *not* considered a
999 999 descendant of itself. Results are ordered by revision number (a
1000 1000 topological sort)."""
1001 1001 first = min(revs)
1002 1002 if first == nullrev:
1003 1003 for i in self:
1004 1004 yield i
1005 1005 return
1006 1006
1007 1007 seen = set(revs)
1008 1008 for i in self.revs(start=first + 1):
1009 1009 for x in self.parentrevs(i):
1010 1010 if x != nullrev and x in seen:
1011 1011 seen.add(i)
1012 1012 yield i
1013 1013 break
1014 1014
1015 1015 def findcommonmissing(self, common=None, heads=None):
1016 1016 """Return a tuple of the ancestors of common and the ancestors of heads
1017 1017 that are not ancestors of common. In revset terminology, we return the
1018 1018 tuple:
1019 1019
1020 1020 ::common, (::heads) - (::common)
1021 1021
1022 1022 The list is sorted by revision number, meaning it is
1023 1023 topologically sorted.
1024 1024
1025 1025 'heads' and 'common' are both lists of node IDs. If heads is
1026 1026 not supplied, uses all of the revlog's heads. If common is not
1027 1027 supplied, uses nullid."""
1028 1028 if common is None:
1029 1029 common = [nullid]
1030 1030 if heads is None:
1031 1031 heads = self.heads()
1032 1032
1033 1033 common = [self.rev(n) for n in common]
1034 1034 heads = [self.rev(n) for n in heads]
1035 1035
1036 1036 # we want the ancestors, but inclusive
1037 1037 class lazyset(object):
1038 1038 def __init__(self, lazyvalues):
1039 1039 self.addedvalues = set()
1040 1040 self.lazyvalues = lazyvalues
1041 1041
1042 1042 def __contains__(self, value):
1043 1043 return value in self.addedvalues or value in self.lazyvalues
1044 1044
1045 1045 def __iter__(self):
1046 1046 added = self.addedvalues
1047 1047 for r in added:
1048 1048 yield r
1049 1049 for r in self.lazyvalues:
1050 1050 if not r in added:
1051 1051 yield r
1052 1052
1053 1053 def add(self, value):
1054 1054 self.addedvalues.add(value)
1055 1055
1056 1056 def update(self, values):
1057 1057 self.addedvalues.update(values)
1058 1058
1059 1059 has = lazyset(self.ancestors(common))
1060 1060 has.add(nullrev)
1061 1061 has.update(common)
1062 1062
1063 1063 # take all ancestors from heads that aren't in has
1064 1064 missing = set()
1065 1065 visit = collections.deque(r for r in heads if r not in has)
1066 1066 while visit:
1067 1067 r = visit.popleft()
1068 1068 if r in missing:
1069 1069 continue
1070 1070 else:
1071 1071 missing.add(r)
1072 1072 for p in self.parentrevs(r):
1073 1073 if p not in has:
1074 1074 visit.append(p)
1075 1075 missing = list(missing)
1076 1076 missing.sort()
1077 1077 return has, [self.node(miss) for miss in missing]
1078 1078
1079 1079 def incrementalmissingrevs(self, common=None):
1080 1080 """Return an object that can be used to incrementally compute the
1081 1081 revision numbers of the ancestors of arbitrary sets that are not
1082 1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 1083 object.
1084 1084
1085 1085 'common' is a list of revision numbers. If common is not supplied, uses
1086 1086 nullrev.
1087 1087 """
1088 1088 if common is None:
1089 1089 common = [nullrev]
1090 1090
1091 1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092 1092
1093 1093 def findmissingrevs(self, common=None, heads=None):
1094 1094 """Return the revision numbers of the ancestors of heads that
1095 1095 are not ancestors of common.
1096 1096
1097 1097 More specifically, return a list of revision numbers corresponding to
1098 1098 nodes N such that every N satisfies the following constraints:
1099 1099
1100 1100 1. N is an ancestor of some node in 'heads'
1101 1101 2. N is not an ancestor of any node in 'common'
1102 1102
1103 1103 The list is sorted by revision number, meaning it is
1104 1104 topologically sorted.
1105 1105
1106 1106 'heads' and 'common' are both lists of revision numbers. If heads is
1107 1107 not supplied, uses all of the revlog's heads. If common is not
1108 1108 supplied, uses nullid."""
1109 1109 if common is None:
1110 1110 common = [nullrev]
1111 1111 if heads is None:
1112 1112 heads = self.headrevs()
1113 1113
1114 1114 inc = self.incrementalmissingrevs(common=common)
1115 1115 return inc.missingancestors(heads)
1116 1116
1117 1117 def findmissing(self, common=None, heads=None):
1118 1118 """Return the ancestors of heads that are not ancestors of common.
1119 1119
1120 1120 More specifically, return a list of nodes N such that every N
1121 1121 satisfies the following constraints:
1122 1122
1123 1123 1. N is an ancestor of some node in 'heads'
1124 1124 2. N is not an ancestor of any node in 'common'
1125 1125
1126 1126 The list is sorted by revision number, meaning it is
1127 1127 topologically sorted.
1128 1128
1129 1129 'heads' and 'common' are both lists of node IDs. If heads is
1130 1130 not supplied, uses all of the revlog's heads. If common is not
1131 1131 supplied, uses nullid."""
1132 1132 if common is None:
1133 1133 common = [nullid]
1134 1134 if heads is None:
1135 1135 heads = self.heads()
1136 1136
1137 1137 common = [self.rev(n) for n in common]
1138 1138 heads = [self.rev(n) for n in heads]
1139 1139
1140 1140 inc = self.incrementalmissingrevs(common=common)
1141 1141 return [self.node(r) for r in inc.missingancestors(heads)]
1142 1142
1143 1143 def nodesbetween(self, roots=None, heads=None):
1144 1144 """Return a topological path from 'roots' to 'heads'.
1145 1145
1146 1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 1147 topologically sorted list of all nodes N that satisfy both of
1148 1148 these constraints:
1149 1149
1150 1150 1. N is a descendant of some node in 'roots'
1151 1151 2. N is an ancestor of some node in 'heads'
1152 1152
1153 1153 Every node is considered to be both a descendant and an ancestor
1154 1154 of itself, so every reachable node in 'roots' and 'heads' will be
1155 1155 included in 'nodes'.
1156 1156
1157 1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160 1160
1161 1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 1162 unspecified, uses nullid as the only root. If 'heads' is
1163 1163 unspecified, uses list of all of the revlog's heads."""
1164 1164 nonodes = ([], [], [])
1165 1165 if roots is not None:
1166 1166 roots = list(roots)
1167 1167 if not roots:
1168 1168 return nonodes
1169 1169 lowestrev = min([self.rev(n) for n in roots])
1170 1170 else:
1171 1171 roots = [nullid] # Everybody's a descendant of nullid
1172 1172 lowestrev = nullrev
1173 1173 if (lowestrev == nullrev) and (heads is None):
1174 1174 # We want _all_ the nodes!
1175 1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 1176 if heads is None:
1177 1177 # All nodes are ancestors, so the latest ancestor is the last
1178 1178 # node.
1179 1179 highestrev = len(self) - 1
1180 1180 # Set ancestors to None to signal that every node is an ancestor.
1181 1181 ancestors = None
1182 1182 # Set heads to an empty dictionary for later discovery of heads
1183 1183 heads = {}
1184 1184 else:
1185 1185 heads = list(heads)
1186 1186 if not heads:
1187 1187 return nonodes
1188 1188 ancestors = set()
1189 1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 1190 # Also, later we will be using it to filter out the heads we can't
1191 1191 # find from roots.
1192 1192 heads = dict.fromkeys(heads, False)
1193 1193 # Start at the top and keep marking parents until we're done.
1194 1194 nodestotag = set(heads)
1195 1195 # Remember where the top was so we can use it as a limit later.
1196 1196 highestrev = max([self.rev(n) for n in nodestotag])
1197 1197 while nodestotag:
1198 1198 # grab a node to tag
1199 1199 n = nodestotag.pop()
1200 1200 # Never tag nullid
1201 1201 if n == nullid:
1202 1202 continue
1203 1203 # A node's revision number represents its place in a
1204 1204 # topologically sorted list of nodes.
1205 1205 r = self.rev(n)
1206 1206 if r >= lowestrev:
1207 1207 if n not in ancestors:
1208 1208 # If we are possibly a descendant of one of the roots
1209 1209 # and we haven't already been marked as an ancestor
1210 1210 ancestors.add(n) # Mark as ancestor
1211 1211 # Add non-nullid parents to list of nodes to tag.
1212 1212 nodestotag.update([p for p in self.parents(n) if
1213 1213 p != nullid])
1214 1214 elif n in heads: # We've seen it before, is it a fake head?
1215 1215 # So it is, real heads should not be the ancestors of
1216 1216 # any other heads.
1217 1217 heads.pop(n)
1218 1218 if not ancestors:
1219 1219 return nonodes
1220 1220 # Now that we have our set of ancestors, we want to remove any
1221 1221 # roots that are not ancestors.
1222 1222
1223 1223 # If one of the roots was nullid, everything is included anyway.
1224 1224 if lowestrev > nullrev:
1225 1225 # But, since we weren't, let's recompute the lowest rev to not
1226 1226 # include roots that aren't ancestors.
1227 1227
1228 1228 # Filter out roots that aren't ancestors of heads
1229 1229 roots = [root for root in roots if root in ancestors]
1230 1230 # Recompute the lowest revision
1231 1231 if roots:
1232 1232 lowestrev = min([self.rev(root) for root in roots])
1233 1233 else:
1234 1234 # No more roots? Return empty list
1235 1235 return nonodes
1236 1236 else:
1237 1237 # We are descending from nullid, and don't need to care about
1238 1238 # any other roots.
1239 1239 lowestrev = nullrev
1240 1240 roots = [nullid]
1241 1241 # Transform our roots list into a set.
1242 1242 descendants = set(roots)
1243 1243 # Also, keep the original roots so we can filter out roots that aren't
1244 1244 # 'real' roots (i.e. are descended from other roots).
1245 1245 roots = descendants.copy()
1246 1246 # Our topologically sorted list of output nodes.
1247 1247 orderedout = []
1248 1248 # Don't start at nullid since we don't want nullid in our output list,
1249 1249 # and if nullid shows up in descendants, empty parents will look like
1250 1250 # they're descendants.
1251 1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 1252 n = self.node(r)
1253 1253 isdescendant = False
1254 1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 1255 isdescendant = True
1256 1256 elif n in descendants:
1257 1257 # n is already a descendant
1258 1258 isdescendant = True
1259 1259 # This check only needs to be done here because all the roots
1260 1260 # will start being marked is descendants before the loop.
1261 1261 if n in roots:
1262 1262 # If n was a root, check if it's a 'real' root.
1263 1263 p = tuple(self.parents(n))
1264 1264 # If any of its parents are descendants, it's not a root.
1265 1265 if (p[0] in descendants) or (p[1] in descendants):
1266 1266 roots.remove(n)
1267 1267 else:
1268 1268 p = tuple(self.parents(n))
1269 1269 # A node is a descendant if either of its parents are
1270 1270 # descendants. (We seeded the dependents list with the roots
1271 1271 # up there, remember?)
1272 1272 if (p[0] in descendants) or (p[1] in descendants):
1273 1273 descendants.add(n)
1274 1274 isdescendant = True
1275 1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 1276 # Only include nodes that are both descendants and ancestors.
1277 1277 orderedout.append(n)
1278 1278 if (ancestors is not None) and (n in heads):
1279 1279 # We're trying to figure out which heads are reachable
1280 1280 # from roots.
1281 1281 # Mark this head as having been reached
1282 1282 heads[n] = True
1283 1283 elif ancestors is None:
1284 1284 # Otherwise, we're trying to discover the heads.
1285 1285 # Assume this is a head because if it isn't, the next step
1286 1286 # will eventually remove it.
1287 1287 heads[n] = True
1288 1288 # But, obviously its parents aren't.
1289 1289 for p in self.parents(n):
1290 1290 heads.pop(p, None)
1291 1291 heads = [head for head, flag in heads.iteritems() if flag]
1292 1292 roots = list(roots)
1293 1293 assert orderedout
1294 1294 assert roots
1295 1295 assert heads
1296 1296 return (orderedout, roots, heads)
1297 1297
1298 1298 def headrevs(self):
1299 1299 try:
1300 1300 return self.index.headrevs()
1301 1301 except AttributeError:
1302 1302 return self._headrevs()
1303 1303
1304 1304 def computephases(self, roots):
1305 1305 return self.index.computephasesmapsets(roots)
1306 1306
1307 1307 def _headrevs(self):
1308 1308 count = len(self)
1309 1309 if not count:
1310 1310 return [nullrev]
1311 1311 # we won't iter over filtered rev so nobody is a head at start
1312 1312 ishead = [0] * (count + 1)
1313 1313 index = self.index
1314 1314 for r in self:
1315 1315 ishead[r] = 1 # I may be an head
1316 1316 e = index[r]
1317 1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 1318 return [r for r, val in enumerate(ishead) if val]
1319 1319
1320 1320 def heads(self, start=None, stop=None):
1321 1321 """return the list of all nodes that have no children
1322 1322
1323 1323 if start is specified, only heads that are descendants of
1324 1324 start will be returned
1325 1325 if stop is specified, it will consider all the revs from stop
1326 1326 as if they had no children
1327 1327 """
1328 1328 if start is None and stop is None:
1329 1329 if not len(self):
1330 1330 return [nullid]
1331 1331 return [self.node(r) for r in self.headrevs()]
1332 1332
1333 1333 if start is None:
1334 1334 start = nullid
1335 1335 if stop is None:
1336 1336 stop = []
1337 1337 stoprevs = set([self.rev(n) for n in stop])
1338 1338 startrev = self.rev(start)
1339 1339 reachable = {startrev}
1340 1340 heads = {startrev}
1341 1341
1342 1342 parentrevs = self.parentrevs
1343 1343 for r in self.revs(start=startrev + 1):
1344 1344 for p in parentrevs(r):
1345 1345 if p in reachable:
1346 1346 if r not in stoprevs:
1347 1347 reachable.add(r)
1348 1348 heads.add(r)
1349 1349 if p in heads and p not in stoprevs:
1350 1350 heads.remove(p)
1351 1351
1352 1352 return [self.node(r) for r in heads]
1353 1353
1354 1354 def children(self, node):
1355 1355 """find the children of a given node"""
1356 1356 c = []
1357 1357 p = self.rev(node)
1358 1358 for r in self.revs(start=p + 1):
1359 1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 1360 if prevs:
1361 1361 for pr in prevs:
1362 1362 if pr == p:
1363 1363 c.append(self.node(r))
1364 1364 elif p == nullrev:
1365 1365 c.append(self.node(r))
1366 1366 return c
1367 1367
1368 1368 def descendant(self, start, end):
1369 1369 if start == nullrev:
1370 1370 return True
1371 1371 for i in self.descendants([start]):
1372 1372 if i == end:
1373 1373 return True
1374 1374 elif i > end:
1375 1375 break
1376 1376 return False
1377 1377
1378 1378 def commonancestorsheads(self, a, b):
1379 1379 """calculate all the heads of the common ancestors of nodes a and b"""
1380 1380 a, b = self.rev(a), self.rev(b)
1381 1381 try:
1382 1382 ancs = self.index.commonancestorsheads(a, b)
1383 1383 except (AttributeError, OverflowError): # C implementation failed
1384 1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 1385 return pycompat.maplist(self.node, ancs)
1386 1386
1387 1387 def isancestor(self, a, b):
1388 1388 """return True if node a is an ancestor of node b
1389 1389
1390 1390 The implementation of this is trivial but the use of
1391 1391 commonancestorsheads is not."""
1392 1392 return a in self.commonancestorsheads(a, b)
1393 1393
1394 1394 def ancestor(self, a, b):
1395 1395 """calculate the "best" common ancestor of nodes a and b"""
1396 1396
1397 1397 a, b = self.rev(a), self.rev(b)
1398 1398 try:
1399 1399 ancs = self.index.ancestors(a, b)
1400 1400 except (AttributeError, OverflowError):
1401 1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 1402 if ancs:
1403 1403 # choose a consistent winner when there's a tie
1404 1404 return min(map(self.node, ancs))
1405 1405 return nullid
1406 1406
1407 1407 def _match(self, id):
1408 1408 if isinstance(id, int):
1409 1409 # rev
1410 1410 return self.node(id)
1411 1411 if len(id) == 20:
1412 1412 # possibly a binary node
1413 1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 1414 try:
1415 1415 node = id
1416 1416 self.rev(node) # quick search the index
1417 1417 return node
1418 1418 except LookupError:
1419 1419 pass # may be partial hex id
1420 1420 try:
1421 1421 # str(rev)
1422 1422 rev = int(id)
1423 1423 if "%d" % rev != id:
1424 1424 raise ValueError
1425 1425 if rev < 0:
1426 1426 rev = len(self) + rev
1427 1427 if rev < 0 or rev >= len(self):
1428 1428 raise ValueError
1429 1429 return self.node(rev)
1430 1430 except (ValueError, OverflowError):
1431 1431 pass
1432 1432 if len(id) == 40:
1433 1433 try:
1434 1434 # a full hex nodeid?
1435 1435 node = bin(id)
1436 1436 self.rev(node)
1437 1437 return node
1438 1438 except (TypeError, LookupError):
1439 1439 pass
1440 1440
1441 1441 def _partialmatch(self, id):
1442 1442 # we don't care wdirfilenodeids as they should be always full hash
1443 1443 maybewdir = wdirhex.startswith(id)
1444 1444 try:
1445 1445 partial = self.index.partialmatch(id)
1446 1446 if partial and self.hasnode(partial):
1447 1447 if maybewdir:
1448 1448 # single 'ff...' match in radix tree, ambiguous with wdir
1449 1449 raise RevlogError
1450 1450 return partial
1451 1451 if maybewdir:
1452 1452 # no 'ff...' match in radix tree, wdir identified
1453 1453 raise error.WdirUnsupported
1454 1454 return None
1455 1455 except RevlogError:
1456 1456 # parsers.c radix tree lookup gave multiple matches
1457 1457 # fast path: for unfiltered changelog, radix tree is accurate
1458 1458 if not getattr(self, 'filteredrevs', None):
1459 1459 raise LookupError(id, self.indexfile,
1460 1460 _('ambiguous identifier'))
1461 1461 # fall through to slow path that filters hidden revisions
1462 1462 except (AttributeError, ValueError):
1463 1463 # we are pure python, or key was too short to search radix tree
1464 1464 pass
1465 1465
1466 1466 if id in self._pcache:
1467 1467 return self._pcache[id]
1468 1468
1469 1469 if len(id) <= 40:
1470 1470 try:
1471 1471 # hex(node)[:...]
1472 1472 l = len(id) // 2 # grab an even number of digits
1473 1473 prefix = bin(id[:l * 2])
1474 1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 1475 nl = [n for n in nl if hex(n).startswith(id) and
1476 1476 self.hasnode(n)]
1477 1477 if len(nl) > 0:
1478 1478 if len(nl) == 1 and not maybewdir:
1479 1479 self._pcache[id] = nl[0]
1480 1480 return nl[0]
1481 1481 raise LookupError(id, self.indexfile,
1482 1482 _('ambiguous identifier'))
1483 1483 if maybewdir:
1484 1484 raise error.WdirUnsupported
1485 1485 return None
1486 1486 except TypeError:
1487 1487 pass
1488 1488
1489 1489 def lookup(self, id):
1490 1490 """locate a node based on:
1491 1491 - revision number or str(revision number)
1492 1492 - nodeid or subset of hex nodeid
1493 1493 """
1494 1494 n = self._match(id)
1495 1495 if n is not None:
1496 1496 return n
1497 1497 n = self._partialmatch(id)
1498 1498 if n:
1499 1499 return n
1500 1500
1501 1501 raise LookupError(id, self.indexfile, _('no match found'))
1502 1502
1503 1503 def shortest(self, node, minlength=1):
1504 1504 """Find the shortest unambiguous prefix that matches node."""
1505 1505 def isrev(prefix):
1506 1506 try:
1507 1507 i = int(prefix)
1508 1508 # if we are a pure int, then starting with zero will not be
1509 1509 # confused as a rev; or, obviously, if the int is larger
1510 1510 # than the value of the tip rev
1511 1511 if prefix[0] == '0' or i > len(self):
1512 1512 return False
1513 1513 return True
1514 1514 except ValueError:
1515 1515 return False
1516 1516
1517 1517 def isvalid(prefix):
1518 1518 try:
1519 1519 node = self._partialmatch(prefix)
1520 1520 except error.RevlogError:
1521 1521 return False
1522 1522 except error.WdirUnsupported:
1523 1523 # single 'ff...' match
1524 1524 return True
1525 1525 if node is None:
1526 1526 raise LookupError(node, self.indexfile, _('no node'))
1527 1527 return not isrev(prefix)
1528 1528
1529 1529 def maybewdir(prefix):
1530 1530 return all(c == 'f' for c in prefix)
1531 1531
1532 1532 hexnode = hex(node)
1533 1533
1534 1534 def disambiguate(hexnode, minlength):
1535 1535 for length in range(minlength, 41):
1536 1536 prefix = hexnode[:length]
1537 1537 if not isrev(prefix) and not maybewdir(prefix):
1538 1538 return prefix
1539 1539
1540 1540 if not getattr(self, 'filteredrevs', None):
1541 1541 try:
1542 1542 length = max(self.index.shortest(node), minlength)
1543 1543 return disambiguate(hexnode, length)
1544 1544 except RevlogError:
1545 if node == wdirid:
1546 for length in range(minlength, 41):
1547 prefix = hexnode[:length]
1548 if isvalid(prefix):
1549 return prefix
1550 else:
1545 if node != wdirid:
1551 1546 raise LookupError(node, self.indexfile, _('no node'))
1552 1547 except AttributeError:
1553 1548 # Fall through to pure code
1554 1549 pass
1555 1550
1556 shortest = hexnode
1557 startlength = max(6, minlength)
1558 length = startlength
1559 while True:
1551 if node == wdirid:
1552 for length in range(minlength, 41):
1553 prefix = hexnode[:length]
1554 if isvalid(prefix):
1555 return prefix
1556
1557 for length in range(minlength, 41):
1560 1558 prefix = hexnode[:length]
1561 1559 if isvalid(prefix):
1562 shortest = prefix
1563 if length == minlength or length > startlength:
1564 return shortest
1565 length -= 1
1566 else:
1567 length += 1
1568 if len(shortest) <= length:
1569 return shortest
1560 return disambiguate(hexnode, length)
1570 1561
1571 1562 def cmp(self, node, text):
1572 1563 """compare text with a given file revision
1573 1564
1574 1565 returns True if text is different than what is stored.
1575 1566 """
1576 1567 p1, p2 = self.parents(node)
1577 1568 return hash(text, p1, p2) != node
1578 1569
1579 1570 def _cachesegment(self, offset, data):
1580 1571 """Add a segment to the revlog cache.
1581 1572
1582 1573 Accepts an absolute offset and the data that is at that location.
1583 1574 """
1584 1575 o, d = self._chunkcache
1585 1576 # try to add to existing cache
1586 1577 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1587 1578 self._chunkcache = o, d + data
1588 1579 else:
1589 1580 self._chunkcache = offset, data
1590 1581
1591 1582 def _readsegment(self, offset, length, df=None):
1592 1583 """Load a segment of raw data from the revlog.
1593 1584
1594 1585 Accepts an absolute offset, length to read, and an optional existing
1595 1586 file handle to read from.
1596 1587
1597 1588 If an existing file handle is passed, it will be seeked and the
1598 1589 original seek position will NOT be restored.
1599 1590
1600 1591 Returns a str or buffer of raw byte data.
1601 1592 """
1602 1593 # Cache data both forward and backward around the requested
1603 1594 # data, in a fixed size window. This helps speed up operations
1604 1595 # involving reading the revlog backwards.
1605 1596 cachesize = self._chunkcachesize
1606 1597 realoffset = offset & ~(cachesize - 1)
1607 1598 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1608 1599 - realoffset)
1609 1600 with self._datareadfp(df) as df:
1610 1601 df.seek(realoffset)
1611 1602 d = df.read(reallength)
1612 1603 self._cachesegment(realoffset, d)
1613 1604 if offset != realoffset or reallength != length:
1614 1605 return util.buffer(d, offset - realoffset, length)
1615 1606 return d
1616 1607
1617 1608 def _getsegment(self, offset, length, df=None):
1618 1609 """Obtain a segment of raw data from the revlog.
1619 1610
1620 1611 Accepts an absolute offset, length of bytes to obtain, and an
1621 1612 optional file handle to the already-opened revlog. If the file
1622 1613 handle is used, it's original seek position will not be preserved.
1623 1614
1624 1615 Requests for data may be returned from a cache.
1625 1616
1626 1617 Returns a str or a buffer instance of raw byte data.
1627 1618 """
1628 1619 o, d = self._chunkcache
1629 1620 l = len(d)
1630 1621
1631 1622 # is it in the cache?
1632 1623 cachestart = offset - o
1633 1624 cacheend = cachestart + length
1634 1625 if cachestart >= 0 and cacheend <= l:
1635 1626 if cachestart == 0 and cacheend == l:
1636 1627 return d # avoid a copy
1637 1628 return util.buffer(d, cachestart, cacheend - cachestart)
1638 1629
1639 1630 return self._readsegment(offset, length, df=df)
1640 1631
1641 1632 def _getsegmentforrevs(self, startrev, endrev, df=None):
1642 1633 """Obtain a segment of raw data corresponding to a range of revisions.
1643 1634
1644 1635 Accepts the start and end revisions and an optional already-open
1645 1636 file handle to be used for reading. If the file handle is read, its
1646 1637 seek position will not be preserved.
1647 1638
1648 1639 Requests for data may be satisfied by a cache.
1649 1640
1650 1641 Returns a 2-tuple of (offset, data) for the requested range of
1651 1642 revisions. Offset is the integer offset from the beginning of the
1652 1643 revlog and data is a str or buffer of the raw byte data.
1653 1644
1654 1645 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1655 1646 to determine where each revision's data begins and ends.
1656 1647 """
1657 1648 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1658 1649 # (functions are expensive).
1659 1650 index = self.index
1660 1651 istart = index[startrev]
1661 1652 start = int(istart[0] >> 16)
1662 1653 if startrev == endrev:
1663 1654 end = start + istart[1]
1664 1655 else:
1665 1656 iend = index[endrev]
1666 1657 end = int(iend[0] >> 16) + iend[1]
1667 1658
1668 1659 if self._inline:
1669 1660 start += (startrev + 1) * self._io.size
1670 1661 end += (endrev + 1) * self._io.size
1671 1662 length = end - start
1672 1663
1673 1664 return start, self._getsegment(start, length, df=df)
1674 1665
1675 1666 def _chunk(self, rev, df=None):
1676 1667 """Obtain a single decompressed chunk for a revision.
1677 1668
1678 1669 Accepts an integer revision and an optional already-open file handle
1679 1670 to be used for reading. If used, the seek position of the file will not
1680 1671 be preserved.
1681 1672
1682 1673 Returns a str holding uncompressed data for the requested revision.
1683 1674 """
1684 1675 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1685 1676
1686 1677 def _chunks(self, revs, df=None):
1687 1678 """Obtain decompressed chunks for the specified revisions.
1688 1679
1689 1680 Accepts an iterable of numeric revisions that are assumed to be in
1690 1681 ascending order. Also accepts an optional already-open file handle
1691 1682 to be used for reading. If used, the seek position of the file will
1692 1683 not be preserved.
1693 1684
1694 1685 This function is similar to calling ``self._chunk()`` multiple times,
1695 1686 but is faster.
1696 1687
1697 1688 Returns a list with decompressed data for each requested revision.
1698 1689 """
1699 1690 if not revs:
1700 1691 return []
1701 1692 start = self.start
1702 1693 length = self.length
1703 1694 inline = self._inline
1704 1695 iosize = self._io.size
1705 1696 buffer = util.buffer
1706 1697
1707 1698 l = []
1708 1699 ladd = l.append
1709 1700
1710 1701 if not self._withsparseread:
1711 1702 slicedchunks = (revs,)
1712 1703 else:
1713 1704 slicedchunks = _slicechunk(self, revs)
1714 1705
1715 1706 for revschunk in slicedchunks:
1716 1707 firstrev = revschunk[0]
1717 1708 # Skip trailing revisions with empty diff
1718 1709 for lastrev in revschunk[::-1]:
1719 1710 if length(lastrev) != 0:
1720 1711 break
1721 1712
1722 1713 try:
1723 1714 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1724 1715 except OverflowError:
1725 1716 # issue4215 - we can't cache a run of chunks greater than
1726 1717 # 2G on Windows
1727 1718 return [self._chunk(rev, df=df) for rev in revschunk]
1728 1719
1729 1720 decomp = self.decompress
1730 1721 for rev in revschunk:
1731 1722 chunkstart = start(rev)
1732 1723 if inline:
1733 1724 chunkstart += (rev + 1) * iosize
1734 1725 chunklength = length(rev)
1735 1726 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1736 1727
1737 1728 return l
1738 1729
1739 1730 def _chunkclear(self):
1740 1731 """Clear the raw chunk cache."""
1741 1732 self._chunkcache = (0, '')
1742 1733
1743 1734 def deltaparent(self, rev):
1744 1735 """return deltaparent of the given revision"""
1745 1736 base = self.index[rev][3]
1746 1737 if base == rev:
1747 1738 return nullrev
1748 1739 elif self._generaldelta:
1749 1740 return base
1750 1741 else:
1751 1742 return rev - 1
1752 1743
1753 1744 def revdiff(self, rev1, rev2):
1754 1745 """return or calculate a delta between two revisions
1755 1746
1756 1747 The delta calculated is in binary form and is intended to be written to
1757 1748 revlog data directly. So this function needs raw revision data.
1758 1749 """
1759 1750 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 1751 return bytes(self._chunk(rev2))
1761 1752
1762 1753 return mdiff.textdiff(self.revision(rev1, raw=True),
1763 1754 self.revision(rev2, raw=True))
1764 1755
1765 1756 def revision(self, nodeorrev, _df=None, raw=False):
1766 1757 """return an uncompressed revision of a given node or revision
1767 1758 number.
1768 1759
1769 1760 _df - an existing file handle to read from. (internal-only)
1770 1761 raw - an optional argument specifying if the revision data is to be
1771 1762 treated as raw data when applying flag transforms. 'raw' should be set
1772 1763 to True when generating changegroups or in debug commands.
1773 1764 """
1774 1765 if isinstance(nodeorrev, int):
1775 1766 rev = nodeorrev
1776 1767 node = self.node(rev)
1777 1768 else:
1778 1769 node = nodeorrev
1779 1770 rev = None
1780 1771
1781 1772 cachedrev = None
1782 1773 flags = None
1783 1774 rawtext = None
1784 1775 if node == nullid:
1785 1776 return ""
1786 1777 if self._cache:
1787 1778 if self._cache[0] == node:
1788 1779 # _cache only stores rawtext
1789 1780 if raw:
1790 1781 return self._cache[2]
1791 1782 # duplicated, but good for perf
1792 1783 if rev is None:
1793 1784 rev = self.rev(node)
1794 1785 if flags is None:
1795 1786 flags = self.flags(rev)
1796 1787 # no extra flags set, no flag processor runs, text = rawtext
1797 1788 if flags == REVIDX_DEFAULT_FLAGS:
1798 1789 return self._cache[2]
1799 1790 # rawtext is reusable. need to run flag processor
1800 1791 rawtext = self._cache[2]
1801 1792
1802 1793 cachedrev = self._cache[1]
1803 1794
1804 1795 # look up what we need to read
1805 1796 if rawtext is None:
1806 1797 if rev is None:
1807 1798 rev = self.rev(node)
1808 1799
1809 1800 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1810 1801 if stopped:
1811 1802 rawtext = self._cache[2]
1812 1803
1813 1804 # drop cache to save memory
1814 1805 self._cache = None
1815 1806
1816 1807 bins = self._chunks(chain, df=_df)
1817 1808 if rawtext is None:
1818 1809 rawtext = bytes(bins[0])
1819 1810 bins = bins[1:]
1820 1811
1821 1812 rawtext = mdiff.patches(rawtext, bins)
1822 1813 self._cache = (node, rev, rawtext)
1823 1814
1824 1815 if flags is None:
1825 1816 if rev is None:
1826 1817 rev = self.rev(node)
1827 1818 flags = self.flags(rev)
1828 1819
1829 1820 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1830 1821 if validatehash:
1831 1822 self.checkhash(text, node, rev=rev)
1832 1823
1833 1824 return text
1834 1825
1835 1826 def hash(self, text, p1, p2):
1836 1827 """Compute a node hash.
1837 1828
1838 1829 Available as a function so that subclasses can replace the hash
1839 1830 as needed.
1840 1831 """
1841 1832 return hash(text, p1, p2)
1842 1833
1843 1834 def _processflags(self, text, flags, operation, raw=False):
1844 1835 """Inspect revision data flags and applies transforms defined by
1845 1836 registered flag processors.
1846 1837
1847 1838 ``text`` - the revision data to process
1848 1839 ``flags`` - the revision flags
1849 1840 ``operation`` - the operation being performed (read or write)
1850 1841 ``raw`` - an optional argument describing if the raw transform should be
1851 1842 applied.
1852 1843
1853 1844 This method processes the flags in the order (or reverse order if
1854 1845 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1855 1846 flag processors registered for present flags. The order of flags defined
1856 1847 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1857 1848
1858 1849 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1859 1850 processed text and ``validatehash`` is a bool indicating whether the
1860 1851 returned text should be checked for hash integrity.
1861 1852
1862 1853 Note: If the ``raw`` argument is set, it has precedence over the
1863 1854 operation and will only update the value of ``validatehash``.
1864 1855 """
1865 1856 # fast path: no flag processors will run
1866 1857 if flags == 0:
1867 1858 return text, True
1868 1859 if not operation in ('read', 'write'):
1869 1860 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1870 1861 # Check all flags are known.
1871 1862 if flags & ~REVIDX_KNOWN_FLAGS:
1872 1863 raise RevlogError(_("incompatible revision flag '%#x'") %
1873 1864 (flags & ~REVIDX_KNOWN_FLAGS))
1874 1865 validatehash = True
1875 1866 # Depending on the operation (read or write), the order might be
1876 1867 # reversed due to non-commutative transforms.
1877 1868 orderedflags = REVIDX_FLAGS_ORDER
1878 1869 if operation == 'write':
1879 1870 orderedflags = reversed(orderedflags)
1880 1871
1881 1872 for flag in orderedflags:
1882 1873 # If a flagprocessor has been registered for a known flag, apply the
1883 1874 # related operation transform and update result tuple.
1884 1875 if flag & flags:
1885 1876 vhash = True
1886 1877
1887 1878 if flag not in _flagprocessors:
1888 1879 message = _("missing processor for flag '%#x'") % (flag)
1889 1880 raise RevlogError(message)
1890 1881
1891 1882 processor = _flagprocessors[flag]
1892 1883 if processor is not None:
1893 1884 readtransform, writetransform, rawtransform = processor
1894 1885
1895 1886 if raw:
1896 1887 vhash = rawtransform(self, text)
1897 1888 elif operation == 'read':
1898 1889 text, vhash = readtransform(self, text)
1899 1890 else: # write operation
1900 1891 text, vhash = writetransform(self, text)
1901 1892 validatehash = validatehash and vhash
1902 1893
1903 1894 return text, validatehash
1904 1895
1905 1896 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1906 1897 """Check node hash integrity.
1907 1898
1908 1899 Available as a function so that subclasses can extend hash mismatch
1909 1900 behaviors as needed.
1910 1901 """
1911 1902 try:
1912 1903 if p1 is None and p2 is None:
1913 1904 p1, p2 = self.parents(node)
1914 1905 if node != self.hash(text, p1, p2):
1915 1906 revornode = rev
1916 1907 if revornode is None:
1917 1908 revornode = templatefilters.short(hex(node))
1918 1909 raise RevlogError(_("integrity check failed on %s:%s")
1919 1910 % (self.indexfile, pycompat.bytestr(revornode)))
1920 1911 except RevlogError:
1921 1912 if self._censorable and _censoredtext(text):
1922 1913 raise error.CensoredNodeError(self.indexfile, node, text)
1923 1914 raise
1924 1915
1925 1916 def _enforceinlinesize(self, tr, fp=None):
1926 1917 """Check if the revlog is too big for inline and convert if so.
1927 1918
1928 1919 This should be called after revisions are added to the revlog. If the
1929 1920 revlog has grown too large to be an inline revlog, it will convert it
1930 1921 to use multiple index and data files.
1931 1922 """
1932 1923 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1933 1924 return
1934 1925
1935 1926 trinfo = tr.find(self.indexfile)
1936 1927 if trinfo is None:
1937 1928 raise RevlogError(_("%s not found in the transaction")
1938 1929 % self.indexfile)
1939 1930
1940 1931 trindex = trinfo[2]
1941 1932 if trindex is not None:
1942 1933 dataoff = self.start(trindex)
1943 1934 else:
1944 1935 # revlog was stripped at start of transaction, use all leftover data
1945 1936 trindex = len(self) - 1
1946 1937 dataoff = self.end(-2)
1947 1938
1948 1939 tr.add(self.datafile, dataoff)
1949 1940
1950 1941 if fp:
1951 1942 fp.flush()
1952 1943 fp.close()
1953 1944
1954 1945 with self._datafp('w') as df:
1955 1946 for r in self:
1956 1947 df.write(self._getsegmentforrevs(r, r)[1])
1957 1948
1958 1949 with self._indexfp('w') as fp:
1959 1950 self.version &= ~FLAG_INLINE_DATA
1960 1951 self._inline = False
1961 1952 io = self._io
1962 1953 for i in self:
1963 1954 e = io.packentry(self.index[i], self.node, self.version, i)
1964 1955 fp.write(e)
1965 1956
1966 1957 # the temp file replace the real index when we exit the context
1967 1958 # manager
1968 1959
1969 1960 tr.replace(self.indexfile, trindex * self._io.size)
1970 1961 self._chunkclear()
1971 1962
1972 1963 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1973 1964 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1974 1965 """add a revision to the log
1975 1966
1976 1967 text - the revision data to add
1977 1968 transaction - the transaction object used for rollback
1978 1969 link - the linkrev data to add
1979 1970 p1, p2 - the parent nodeids of the revision
1980 1971 cachedelta - an optional precomputed delta
1981 1972 node - nodeid of revision; typically node is not specified, and it is
1982 1973 computed by default as hash(text, p1, p2), however subclasses might
1983 1974 use different hashing method (and override checkhash() in such case)
1984 1975 flags - the known flags to set on the revision
1985 1976 deltacomputer - an optional _deltacomputer instance shared between
1986 1977 multiple calls
1987 1978 """
1988 1979 if link == nullrev:
1989 1980 raise RevlogError(_("attempted to add linkrev -1 to %s")
1990 1981 % self.indexfile)
1991 1982
1992 1983 if flags:
1993 1984 node = node or self.hash(text, p1, p2)
1994 1985
1995 1986 rawtext, validatehash = self._processflags(text, flags, 'write')
1996 1987
1997 1988 # If the flag processor modifies the revision data, ignore any provided
1998 1989 # cachedelta.
1999 1990 if rawtext != text:
2000 1991 cachedelta = None
2001 1992
2002 1993 if len(rawtext) > _maxentrysize:
2003 1994 raise RevlogError(
2004 1995 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2005 1996 % (self.indexfile, len(rawtext)))
2006 1997
2007 1998 node = node or self.hash(rawtext, p1, p2)
2008 1999 if node in self.nodemap:
2009 2000 return node
2010 2001
2011 2002 if validatehash:
2012 2003 self.checkhash(rawtext, node, p1=p1, p2=p2)
2013 2004
2014 2005 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2015 2006 flags, cachedelta=cachedelta,
2016 2007 deltacomputer=deltacomputer)
2017 2008
2018 2009 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2019 2010 cachedelta=None, deltacomputer=None):
2020 2011 """add a raw revision with known flags, node and parents
2021 2012 useful when reusing a revision not stored in this revlog (ex: received
2022 2013 over wire, or read from an external bundle).
2023 2014 """
2024 2015 dfh = None
2025 2016 if not self._inline:
2026 2017 dfh = self._datafp("a+")
2027 2018 ifh = self._indexfp("a+")
2028 2019 try:
2029 2020 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2030 2021 flags, cachedelta, ifh, dfh,
2031 2022 deltacomputer=deltacomputer)
2032 2023 finally:
2033 2024 if dfh:
2034 2025 dfh.close()
2035 2026 ifh.close()
2036 2027
2037 2028 def compress(self, data):
2038 2029 """Generate a possibly-compressed representation of data."""
2039 2030 if not data:
2040 2031 return '', data
2041 2032
2042 2033 compressed = self._compressor.compress(data)
2043 2034
2044 2035 if compressed:
2045 2036 # The revlog compressor added the header in the returned data.
2046 2037 return '', compressed
2047 2038
2048 2039 if data[0:1] == '\0':
2049 2040 return '', data
2050 2041 return 'u', data
2051 2042
2052 2043 def decompress(self, data):
2053 2044 """Decompress a revlog chunk.
2054 2045
2055 2046 The chunk is expected to begin with a header identifying the
2056 2047 format type so it can be routed to an appropriate decompressor.
2057 2048 """
2058 2049 if not data:
2059 2050 return data
2060 2051
2061 2052 # Revlogs are read much more frequently than they are written and many
2062 2053 # chunks only take microseconds to decompress, so performance is
2063 2054 # important here.
2064 2055 #
2065 2056 # We can make a few assumptions about revlogs:
2066 2057 #
2067 2058 # 1) the majority of chunks will be compressed (as opposed to inline
2068 2059 # raw data).
2069 2060 # 2) decompressing *any* data will likely by at least 10x slower than
2070 2061 # returning raw inline data.
2071 2062 # 3) we want to prioritize common and officially supported compression
2072 2063 # engines
2073 2064 #
2074 2065 # It follows that we want to optimize for "decompress compressed data
2075 2066 # when encoded with common and officially supported compression engines"
2076 2067 # case over "raw data" and "data encoded by less common or non-official
2077 2068 # compression engines." That is why we have the inline lookup first
2078 2069 # followed by the compengines lookup.
2079 2070 #
2080 2071 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2081 2072 # compressed chunks. And this matters for changelog and manifest reads.
2082 2073 t = data[0:1]
2083 2074
2084 2075 if t == 'x':
2085 2076 try:
2086 2077 return _zlibdecompress(data)
2087 2078 except zlib.error as e:
2088 2079 raise RevlogError(_('revlog decompress error: %s') %
2089 2080 stringutil.forcebytestr(e))
2090 2081 # '\0' is more common than 'u' so it goes first.
2091 2082 elif t == '\0':
2092 2083 return data
2093 2084 elif t == 'u':
2094 2085 return util.buffer(data, 1)
2095 2086
2096 2087 try:
2097 2088 compressor = self._decompressors[t]
2098 2089 except KeyError:
2099 2090 try:
2100 2091 engine = util.compengines.forrevlogheader(t)
2101 2092 compressor = engine.revlogcompressor()
2102 2093 self._decompressors[t] = compressor
2103 2094 except KeyError:
2104 2095 raise RevlogError(_('unknown compression type %r') % t)
2105 2096
2106 2097 return compressor.decompress(data)
2107 2098
2108 2099 def _isgooddeltainfo(self, d, textlen):
2109 2100 """Returns True if the given delta is good. Good means that it is within
2110 2101 the disk span, disk size, and chain length bounds that we know to be
2111 2102 performant."""
2112 2103 if d is None:
2113 2104 return False
2114 2105
2115 2106 # - 'd.distance' is the distance from the base revision -- bounding it
2116 2107 # limits the amount of I/O we need to do.
2117 2108 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2118 2109 # need to apply -- bounding it limits the amount of CPU we consume.
2119 2110
2120 2111 defaultmax = textlen * 4
2121 2112 maxdist = self._maxdeltachainspan
2122 2113 if not maxdist:
2123 2114 maxdist = d.distance # ensure the conditional pass
2124 2115 maxdist = max(maxdist, defaultmax)
2125 2116 if (d.distance > maxdist or d.deltalen > textlen or
2126 2117 d.compresseddeltalen > textlen * 2 or
2127 2118 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2128 2119 return False
2129 2120
2130 2121 return True
2131 2122
2132 2123 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2133 2124 cachedelta, ifh, dfh, alwayscache=False,
2134 2125 deltacomputer=None):
2135 2126 """internal function to add revisions to the log
2136 2127
2137 2128 see addrevision for argument descriptions.
2138 2129
2139 2130 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2140 2131
2141 2132 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2142 2133 be used.
2143 2134
2144 2135 invariants:
2145 2136 - rawtext is optional (can be None); if not set, cachedelta must be set.
2146 2137 if both are set, they must correspond to each other.
2147 2138 """
2148 2139 if node == nullid:
2149 2140 raise RevlogError(_("%s: attempt to add null revision") %
2150 2141 (self.indexfile))
2151 2142 if node == wdirid or node in wdirfilenodeids:
2152 2143 raise RevlogError(_("%s: attempt to add wdir revision") %
2153 2144 (self.indexfile))
2154 2145
2155 2146 if self._inline:
2156 2147 fh = ifh
2157 2148 else:
2158 2149 fh = dfh
2159 2150
2160 2151 btext = [rawtext]
2161 2152
2162 2153 curr = len(self)
2163 2154 prev = curr - 1
2164 2155 offset = self.end(prev)
2165 2156 p1r, p2r = self.rev(p1), self.rev(p2)
2166 2157
2167 2158 # full versions are inserted when the needed deltas
2168 2159 # become comparable to the uncompressed text
2169 2160 if rawtext is None:
2170 2161 # need rawtext size, before changed by flag processors, which is
2171 2162 # the non-raw size. use revlog explicitly to avoid filelog's extra
2172 2163 # logic that might remove metadata size.
2173 2164 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2174 2165 cachedelta[1])
2175 2166 else:
2176 2167 textlen = len(rawtext)
2177 2168
2178 2169 if deltacomputer is None:
2179 2170 deltacomputer = _deltacomputer(self)
2180 2171
2181 2172 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2182 2173
2183 2174 # no delta for flag processor revision (see "candelta" for why)
2184 2175 # not calling candelta since only one revision needs test, also to
2185 2176 # avoid overhead fetching flags again.
2186 2177 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2187 2178 deltainfo = None
2188 2179 else:
2189 2180 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2190 2181
2191 2182 if deltainfo is not None:
2192 2183 base = deltainfo.base
2193 2184 chainbase = deltainfo.chainbase
2194 2185 data = deltainfo.data
2195 2186 l = deltainfo.deltalen
2196 2187 else:
2197 2188 rawtext = deltacomputer.buildtext(revinfo, fh)
2198 2189 data = self.compress(rawtext)
2199 2190 l = len(data[1]) + len(data[0])
2200 2191 base = chainbase = curr
2201 2192
2202 2193 e = (offset_type(offset, flags), l, textlen,
2203 2194 base, link, p1r, p2r, node)
2204 2195 self.index.insert(-1, e)
2205 2196 self.nodemap[node] = curr
2206 2197
2207 2198 entry = self._io.packentry(e, self.node, self.version, curr)
2208 2199 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2209 2200
2210 2201 if alwayscache and rawtext is None:
2211 2202 rawtext = deltacomputer._buildtext(revinfo, fh)
2212 2203
2213 2204 if type(rawtext) == bytes: # only accept immutable objects
2214 2205 self._cache = (node, curr, rawtext)
2215 2206 self._chainbasecache[curr] = chainbase
2216 2207 return node
2217 2208
2218 2209 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2219 2210 # Files opened in a+ mode have inconsistent behavior on various
2220 2211 # platforms. Windows requires that a file positioning call be made
2221 2212 # when the file handle transitions between reads and writes. See
2222 2213 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2223 2214 # platforms, Python or the platform itself can be buggy. Some versions
2224 2215 # of Solaris have been observed to not append at the end of the file
2225 2216 # if the file was seeked to before the end. See issue4943 for more.
2226 2217 #
2227 2218 # We work around this issue by inserting a seek() before writing.
2228 2219 # Note: This is likely not necessary on Python 3.
2229 2220 ifh.seek(0, os.SEEK_END)
2230 2221 if dfh:
2231 2222 dfh.seek(0, os.SEEK_END)
2232 2223
2233 2224 curr = len(self) - 1
2234 2225 if not self._inline:
2235 2226 transaction.add(self.datafile, offset)
2236 2227 transaction.add(self.indexfile, curr * len(entry))
2237 2228 if data[0]:
2238 2229 dfh.write(data[0])
2239 2230 dfh.write(data[1])
2240 2231 ifh.write(entry)
2241 2232 else:
2242 2233 offset += curr * self._io.size
2243 2234 transaction.add(self.indexfile, offset, curr)
2244 2235 ifh.write(entry)
2245 2236 ifh.write(data[0])
2246 2237 ifh.write(data[1])
2247 2238 self._enforceinlinesize(transaction, ifh)
2248 2239
2249 2240 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2250 2241 """
2251 2242 add a delta group
2252 2243
2253 2244 given a set of deltas, add them to the revision log. the
2254 2245 first delta is against its parent, which should be in our
2255 2246 log, the rest are against the previous delta.
2256 2247
2257 2248 If ``addrevisioncb`` is defined, it will be called with arguments of
2258 2249 this revlog and the node that was added.
2259 2250 """
2260 2251
2261 2252 nodes = []
2262 2253
2263 2254 r = len(self)
2264 2255 end = 0
2265 2256 if r:
2266 2257 end = self.end(r - 1)
2267 2258 ifh = self._indexfp("a+")
2268 2259 isize = r * self._io.size
2269 2260 if self._inline:
2270 2261 transaction.add(self.indexfile, end + isize, r)
2271 2262 dfh = None
2272 2263 else:
2273 2264 transaction.add(self.indexfile, isize, r)
2274 2265 transaction.add(self.datafile, end)
2275 2266 dfh = self._datafp("a+")
2276 2267 def flush():
2277 2268 if dfh:
2278 2269 dfh.flush()
2279 2270 ifh.flush()
2280 2271 try:
2281 2272 deltacomputer = _deltacomputer(self)
2282 2273 # loop through our set of deltas
2283 2274 for data in deltas:
2284 2275 node, p1, p2, linknode, deltabase, delta, flags = data
2285 2276 link = linkmapper(linknode)
2286 2277 flags = flags or REVIDX_DEFAULT_FLAGS
2287 2278
2288 2279 nodes.append(node)
2289 2280
2290 2281 if node in self.nodemap:
2291 2282 # this can happen if two branches make the same change
2292 2283 continue
2293 2284
2294 2285 for p in (p1, p2):
2295 2286 if p not in self.nodemap:
2296 2287 raise LookupError(p, self.indexfile,
2297 2288 _('unknown parent'))
2298 2289
2299 2290 if deltabase not in self.nodemap:
2300 2291 raise LookupError(deltabase, self.indexfile,
2301 2292 _('unknown delta base'))
2302 2293
2303 2294 baserev = self.rev(deltabase)
2304 2295
2305 2296 if baserev != nullrev and self.iscensored(baserev):
2306 2297 # if base is censored, delta must be full replacement in a
2307 2298 # single patch operation
2308 2299 hlen = struct.calcsize(">lll")
2309 2300 oldlen = self.rawsize(baserev)
2310 2301 newlen = len(delta) - hlen
2311 2302 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2312 2303 raise error.CensoredBaseError(self.indexfile,
2313 2304 self.node(baserev))
2314 2305
2315 2306 if not flags and self._peek_iscensored(baserev, delta, flush):
2316 2307 flags |= REVIDX_ISCENSORED
2317 2308
2318 2309 # We assume consumers of addrevisioncb will want to retrieve
2319 2310 # the added revision, which will require a call to
2320 2311 # revision(). revision() will fast path if there is a cache
2321 2312 # hit. So, we tell _addrevision() to always cache in this case.
2322 2313 # We're only using addgroup() in the context of changegroup
2323 2314 # generation so the revision data can always be handled as raw
2324 2315 # by the flagprocessor.
2325 2316 self._addrevision(node, None, transaction, link,
2326 2317 p1, p2, flags, (baserev, delta),
2327 2318 ifh, dfh,
2328 2319 alwayscache=bool(addrevisioncb),
2329 2320 deltacomputer=deltacomputer)
2330 2321
2331 2322 if addrevisioncb:
2332 2323 addrevisioncb(self, node)
2333 2324
2334 2325 if not dfh and not self._inline:
2335 2326 # addrevision switched from inline to conventional
2336 2327 # reopen the index
2337 2328 ifh.close()
2338 2329 dfh = self._datafp("a+")
2339 2330 ifh = self._indexfp("a+")
2340 2331 finally:
2341 2332 if dfh:
2342 2333 dfh.close()
2343 2334 ifh.close()
2344 2335
2345 2336 return nodes
2346 2337
2347 2338 def iscensored(self, rev):
2348 2339 """Check if a file revision is censored."""
2349 2340 if not self._censorable:
2350 2341 return False
2351 2342
2352 2343 return self.flags(rev) & REVIDX_ISCENSORED
2353 2344
2354 2345 def _peek_iscensored(self, baserev, delta, flush):
2355 2346 """Quickly check if a delta produces a censored revision."""
2356 2347 if not self._censorable:
2357 2348 return False
2358 2349
2359 2350 # Fragile heuristic: unless new file meta keys are added alphabetically
2360 2351 # preceding "censored", all censored revisions are prefixed by
2361 2352 # "\1\ncensored:". A delta producing such a censored revision must be a
2362 2353 # full-replacement delta, so we inspect the first and only patch in the
2363 2354 # delta for this prefix.
2364 2355 hlen = struct.calcsize(">lll")
2365 2356 if len(delta) <= hlen:
2366 2357 return False
2367 2358
2368 2359 oldlen = self.rawsize(baserev)
2369 2360 newlen = len(delta) - hlen
2370 2361 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2371 2362 return False
2372 2363
2373 2364 add = "\1\ncensored:"
2374 2365 addlen = len(add)
2375 2366 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2376 2367
2377 2368 def getstrippoint(self, minlink):
2378 2369 """find the minimum rev that must be stripped to strip the linkrev
2379 2370
2380 2371 Returns a tuple containing the minimum rev and a set of all revs that
2381 2372 have linkrevs that will be broken by this strip.
2382 2373 """
2383 2374 brokenrevs = set()
2384 2375 strippoint = len(self)
2385 2376
2386 2377 heads = {}
2387 2378 futurelargelinkrevs = set()
2388 2379 for head in self.headrevs():
2389 2380 headlinkrev = self.linkrev(head)
2390 2381 heads[head] = headlinkrev
2391 2382 if headlinkrev >= minlink:
2392 2383 futurelargelinkrevs.add(headlinkrev)
2393 2384
2394 2385 # This algorithm involves walking down the rev graph, starting at the
2395 2386 # heads. Since the revs are topologically sorted according to linkrev,
2396 2387 # once all head linkrevs are below the minlink, we know there are
2397 2388 # no more revs that could have a linkrev greater than minlink.
2398 2389 # So we can stop walking.
2399 2390 while futurelargelinkrevs:
2400 2391 strippoint -= 1
2401 2392 linkrev = heads.pop(strippoint)
2402 2393
2403 2394 if linkrev < minlink:
2404 2395 brokenrevs.add(strippoint)
2405 2396 else:
2406 2397 futurelargelinkrevs.remove(linkrev)
2407 2398
2408 2399 for p in self.parentrevs(strippoint):
2409 2400 if p != nullrev:
2410 2401 plinkrev = self.linkrev(p)
2411 2402 heads[p] = plinkrev
2412 2403 if plinkrev >= minlink:
2413 2404 futurelargelinkrevs.add(plinkrev)
2414 2405
2415 2406 return strippoint, brokenrevs
2416 2407
2417 2408 def strip(self, minlink, transaction):
2418 2409 """truncate the revlog on the first revision with a linkrev >= minlink
2419 2410
2420 2411 This function is called when we're stripping revision minlink and
2421 2412 its descendants from the repository.
2422 2413
2423 2414 We have to remove all revisions with linkrev >= minlink, because
2424 2415 the equivalent changelog revisions will be renumbered after the
2425 2416 strip.
2426 2417
2427 2418 So we truncate the revlog on the first of these revisions, and
2428 2419 trust that the caller has saved the revisions that shouldn't be
2429 2420 removed and that it'll re-add them after this truncation.
2430 2421 """
2431 2422 if len(self) == 0:
2432 2423 return
2433 2424
2434 2425 rev, _ = self.getstrippoint(minlink)
2435 2426 if rev == len(self):
2436 2427 return
2437 2428
2438 2429 # first truncate the files on disk
2439 2430 end = self.start(rev)
2440 2431 if not self._inline:
2441 2432 transaction.add(self.datafile, end)
2442 2433 end = rev * self._io.size
2443 2434 else:
2444 2435 end += rev * self._io.size
2445 2436
2446 2437 transaction.add(self.indexfile, end)
2447 2438
2448 2439 # then reset internal state in memory to forget those revisions
2449 2440 self._cache = None
2450 2441 self._chaininfocache = {}
2451 2442 self._chunkclear()
2452 2443 for x in xrange(rev, len(self)):
2453 2444 del self.nodemap[self.node(x)]
2454 2445
2455 2446 del self.index[rev:-1]
2456 2447 self._nodepos = None
2457 2448
2458 2449 def checksize(self):
2459 2450 expected = 0
2460 2451 if len(self):
2461 2452 expected = max(0, self.end(len(self) - 1))
2462 2453
2463 2454 try:
2464 2455 with self._datafp() as f:
2465 2456 f.seek(0, 2)
2466 2457 actual = f.tell()
2467 2458 dd = actual - expected
2468 2459 except IOError as inst:
2469 2460 if inst.errno != errno.ENOENT:
2470 2461 raise
2471 2462 dd = 0
2472 2463
2473 2464 try:
2474 2465 f = self.opener(self.indexfile)
2475 2466 f.seek(0, 2)
2476 2467 actual = f.tell()
2477 2468 f.close()
2478 2469 s = self._io.size
2479 2470 i = max(0, actual // s)
2480 2471 di = actual - (i * s)
2481 2472 if self._inline:
2482 2473 databytes = 0
2483 2474 for r in self:
2484 2475 databytes += max(0, self.length(r))
2485 2476 dd = 0
2486 2477 di = actual - len(self) * s - databytes
2487 2478 except IOError as inst:
2488 2479 if inst.errno != errno.ENOENT:
2489 2480 raise
2490 2481 di = 0
2491 2482
2492 2483 return (dd, di)
2493 2484
2494 2485 def files(self):
2495 2486 res = [self.indexfile]
2496 2487 if not self._inline:
2497 2488 res.append(self.datafile)
2498 2489 return res
2499 2490
2500 2491 DELTAREUSEALWAYS = 'always'
2501 2492 DELTAREUSESAMEREVS = 'samerevs'
2502 2493 DELTAREUSENEVER = 'never'
2503 2494
2504 2495 DELTAREUSEFULLADD = 'fulladd'
2505 2496
2506 2497 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2507 2498
2508 2499 def clone(self, tr, destrevlog, addrevisioncb=None,
2509 2500 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2510 2501 """Copy this revlog to another, possibly with format changes.
2511 2502
2512 2503 The destination revlog will contain the same revisions and nodes.
2513 2504 However, it may not be bit-for-bit identical due to e.g. delta encoding
2514 2505 differences.
2515 2506
2516 2507 The ``deltareuse`` argument control how deltas from the existing revlog
2517 2508 are preserved in the destination revlog. The argument can have the
2518 2509 following values:
2519 2510
2520 2511 DELTAREUSEALWAYS
2521 2512 Deltas will always be reused (if possible), even if the destination
2522 2513 revlog would not select the same revisions for the delta. This is the
2523 2514 fastest mode of operation.
2524 2515 DELTAREUSESAMEREVS
2525 2516 Deltas will be reused if the destination revlog would pick the same
2526 2517 revisions for the delta. This mode strikes a balance between speed
2527 2518 and optimization.
2528 2519 DELTAREUSENEVER
2529 2520 Deltas will never be reused. This is the slowest mode of execution.
2530 2521 This mode can be used to recompute deltas (e.g. if the diff/delta
2531 2522 algorithm changes).
2532 2523
2533 2524 Delta computation can be slow, so the choice of delta reuse policy can
2534 2525 significantly affect run time.
2535 2526
2536 2527 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2537 2528 two extremes. Deltas will be reused if they are appropriate. But if the
2538 2529 delta could choose a better revision, it will do so. This means if you
2539 2530 are converting a non-generaldelta revlog to a generaldelta revlog,
2540 2531 deltas will be recomputed if the delta's parent isn't a parent of the
2541 2532 revision.
2542 2533
2543 2534 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2544 2535 controls whether to compute deltas against both parents for merges.
2545 2536 By default, the current default is used.
2546 2537 """
2547 2538 if deltareuse not in self.DELTAREUSEALL:
2548 2539 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2549 2540
2550 2541 if len(destrevlog):
2551 2542 raise ValueError(_('destination revlog is not empty'))
2552 2543
2553 2544 if getattr(self, 'filteredrevs', None):
2554 2545 raise ValueError(_('source revlog has filtered revisions'))
2555 2546 if getattr(destrevlog, 'filteredrevs', None):
2556 2547 raise ValueError(_('destination revlog has filtered revisions'))
2557 2548
2558 2549 # lazydeltabase controls whether to reuse a cached delta, if possible.
2559 2550 oldlazydeltabase = destrevlog._lazydeltabase
2560 2551 oldamd = destrevlog._aggressivemergedeltas
2561 2552
2562 2553 try:
2563 2554 if deltareuse == self.DELTAREUSEALWAYS:
2564 2555 destrevlog._lazydeltabase = True
2565 2556 elif deltareuse == self.DELTAREUSESAMEREVS:
2566 2557 destrevlog._lazydeltabase = False
2567 2558
2568 2559 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2569 2560
2570 2561 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2571 2562 self.DELTAREUSESAMEREVS)
2572 2563
2573 2564 deltacomputer = _deltacomputer(destrevlog)
2574 2565 index = self.index
2575 2566 for rev in self:
2576 2567 entry = index[rev]
2577 2568
2578 2569 # Some classes override linkrev to take filtered revs into
2579 2570 # account. Use raw entry from index.
2580 2571 flags = entry[0] & 0xffff
2581 2572 linkrev = entry[4]
2582 2573 p1 = index[entry[5]][7]
2583 2574 p2 = index[entry[6]][7]
2584 2575 node = entry[7]
2585 2576
2586 2577 # (Possibly) reuse the delta from the revlog if allowed and
2587 2578 # the revlog chunk is a delta.
2588 2579 cachedelta = None
2589 2580 rawtext = None
2590 2581 if populatecachedelta:
2591 2582 dp = self.deltaparent(rev)
2592 2583 if dp != nullrev:
2593 2584 cachedelta = (dp, bytes(self._chunk(rev)))
2594 2585
2595 2586 if not cachedelta:
2596 2587 rawtext = self.revision(rev, raw=True)
2597 2588
2598 2589
2599 2590 if deltareuse == self.DELTAREUSEFULLADD:
2600 2591 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2601 2592 cachedelta=cachedelta,
2602 2593 node=node, flags=flags,
2603 2594 deltacomputer=deltacomputer)
2604 2595 else:
2605 2596 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2606 2597 checkambig=False)
2607 2598 dfh = None
2608 2599 if not destrevlog._inline:
2609 2600 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2610 2601 try:
2611 2602 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2612 2603 p2, flags, cachedelta, ifh, dfh,
2613 2604 deltacomputer=deltacomputer)
2614 2605 finally:
2615 2606 if dfh:
2616 2607 dfh.close()
2617 2608 ifh.close()
2618 2609
2619 2610 if addrevisioncb:
2620 2611 addrevisioncb(self, rev, node)
2621 2612 finally:
2622 2613 destrevlog._lazydeltabase = oldlazydeltabase
2623 2614 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now