##// END OF EJS Templates
revlog: add a doctest to _trimchunk
Boris Feld -
r38657:740f7d44 default
parent child Browse files
Show More
@@ -1,2656 +1,2691 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 class _testrevlog(object):
200 200 """minimalist fake revlog to use in doctests"""
201 201
202 202 def __init__(self, data, density=0.5, mingap=0):
203 203 """data is an list of revision payload boundaries"""
204 204 self._data = data
205 205 self._srdensitythreshold = density
206 206 self._srmingapsize = mingap
207 207
208 208 def start(self, rev):
209 209 if rev == 0:
210 210 return 0
211 211 return self._data[rev - 1]
212 212
213 213 def end(self, rev):
214 214 return self._data[rev]
215 215
216 216 def length(self, rev):
217 217 return self.end(rev) - self.start(rev)
218 218
219 219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 220 """returns revs[startidx:endidx] without empty trailing revs
221
222 Doctest Setup
223 >>> revlog = _testrevlog([
224 ... 5, #0
225 ... 10, #1
226 ... 12, #2
227 ... 12, #3 (empty)
228 ... 17, #4
229 ... 21, #5
230 ... 21, #6 (empty)
231 ... ])
232
233 Contiguous cases:
234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 [0, 1, 2, 3, 4, 5]
236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 [0, 1, 2, 3, 4]
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 [0, 1, 2]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 [2]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 [3, 4, 5]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 [3, 4]
246
247 Discontiguous cases:
248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 [1, 3, 5]
250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 [1]
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 [3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 [3, 5]
221 256 """
222 257 length = revlog.length
223 258
224 259 if endidx is None:
225 260 endidx = len(revs)
226 261
227 262 # Trim empty revs at the end, but never the very first revision of a chain
228 263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
229 264 endidx -= 1
230 265
231 266 return revs[startidx:endidx]
232 267
233 268 def _segmentspan(revlog, revs):
234 269 """Get the byte span of a segment of revisions
235 270
236 271 revs is a sorted array of revision numbers
237 272 """
238 273 if not revs:
239 274 return 0
240 275 return revlog.end(revs[-1]) - revlog.start(revs[0])
241 276
242 277 def _slicechunk(revlog, revs):
243 278 """slice revs to reduce the amount of unrelated data to be read from disk.
244 279
245 280 ``revs`` is sliced into groups that should be read in one time.
246 281 Assume that revs are sorted.
247 282 """
248 283 start = revlog.start
249 284 length = revlog.length
250 285
251 286 if len(revs) <= 1:
252 287 yield revs
253 288 return
254 289
255 290 readdata = deltachainspan = _segmentspan(revlog, revs)
256 291
257 292 if deltachainspan < revlog._srmingapsize:
258 293 yield revs
259 294 return
260 295
261 296 chainpayload = sum(length(r) for r in revs)
262 297
263 298 if deltachainspan:
264 299 density = chainpayload / float(deltachainspan)
265 300 else:
266 301 density = 1.0
267 302
268 303 if density >= revlog._srdensitythreshold:
269 304 yield revs
270 305 return
271 306
272 307 # Store the gaps in a heap to have them sorted by decreasing size
273 308 gapsheap = []
274 309 heapq.heapify(gapsheap)
275 310 prevend = None
276 311 for i, rev in enumerate(revs):
277 312 revstart = start(rev)
278 313 revlen = length(rev)
279 314
280 315 # Skip empty revisions to form larger holes
281 316 if revlen == 0:
282 317 continue
283 318
284 319 if prevend is not None:
285 320 gapsize = revstart - prevend
286 321 # only consider holes that are large enough
287 322 if gapsize > revlog._srmingapsize:
288 323 heapq.heappush(gapsheap, (-gapsize, i))
289 324
290 325 prevend = revstart + revlen
291 326
292 327 # Collect the indices of the largest holes until the density is acceptable
293 328 indicesheap = []
294 329 heapq.heapify(indicesheap)
295 330 while gapsheap and density < revlog._srdensitythreshold:
296 331 oppgapsize, gapidx = heapq.heappop(gapsheap)
297 332
298 333 heapq.heappush(indicesheap, gapidx)
299 334
300 335 # the gap sizes are stored as negatives to be sorted decreasingly
301 336 # by the heap
302 337 readdata -= (-oppgapsize)
303 338 if readdata > 0:
304 339 density = chainpayload / float(readdata)
305 340 else:
306 341 density = 1.0
307 342
308 343 # Cut the revs at collected indices
309 344 previdx = 0
310 345 while indicesheap:
311 346 idx = heapq.heappop(indicesheap)
312 347
313 348 chunk = _trimchunk(revlog, revs, previdx, idx)
314 349 if chunk:
315 350 yield chunk
316 351
317 352 previdx = idx
318 353
319 354 chunk = _trimchunk(revlog, revs, previdx)
320 355 if chunk:
321 356 yield chunk
322 357
323 358 @attr.s(slots=True, frozen=True)
324 359 class _deltainfo(object):
325 360 distance = attr.ib()
326 361 deltalen = attr.ib()
327 362 data = attr.ib()
328 363 base = attr.ib()
329 364 chainbase = attr.ib()
330 365 chainlen = attr.ib()
331 366 compresseddeltalen = attr.ib()
332 367
333 368 class _deltacomputer(object):
334 369 def __init__(self, revlog):
335 370 self.revlog = revlog
336 371
337 372 def _getcandidaterevs(self, p1, p2, cachedelta):
338 373 """
339 374 Provides revisions that present an interest to be diffed against,
340 375 grouped by level of easiness.
341 376 """
342 377 revlog = self.revlog
343 378 gdelta = revlog._generaldelta
344 379 curr = len(revlog)
345 380 prev = curr - 1
346 381 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
347 382
348 383 # should we try to build a delta?
349 384 if prev != nullrev and revlog.storedeltachains:
350 385 tested = set()
351 386 # This condition is true most of the time when processing
352 387 # changegroup data into a generaldelta repo. The only time it
353 388 # isn't true is if this is the first revision in a delta chain
354 389 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
355 390 if cachedelta and gdelta and revlog._lazydeltabase:
356 391 # Assume what we received from the server is a good choice
357 392 # build delta will reuse the cache
358 393 yield (cachedelta[0],)
359 394 tested.add(cachedelta[0])
360 395
361 396 if gdelta:
362 397 # exclude already lazy tested base if any
363 398 parents = [p for p in (p1r, p2r)
364 399 if p != nullrev and p not in tested]
365 400
366 401 if not revlog._aggressivemergedeltas and len(parents) == 2:
367 402 parents.sort()
368 403 # To minimize the chance of having to build a fulltext,
369 404 # pick first whichever parent is closest to us (max rev)
370 405 yield (parents[1],)
371 406 # then the other one (min rev) if the first did not fit
372 407 yield (parents[0],)
373 408 tested.update(parents)
374 409 elif len(parents) > 0:
375 410 # Test all parents (1 or 2), and keep the best candidate
376 411 yield parents
377 412 tested.update(parents)
378 413
379 414 if prev not in tested:
380 415 # other approach failed try against prev to hopefully save us a
381 416 # fulltext.
382 417 yield (prev,)
383 418 tested.add(prev)
384 419
385 420 def buildtext(self, revinfo, fh):
386 421 """Builds a fulltext version of a revision
387 422
388 423 revinfo: _revisioninfo instance that contains all needed info
389 424 fh: file handle to either the .i or the .d revlog file,
390 425 depending on whether it is inlined or not
391 426 """
392 427 btext = revinfo.btext
393 428 if btext[0] is not None:
394 429 return btext[0]
395 430
396 431 revlog = self.revlog
397 432 cachedelta = revinfo.cachedelta
398 433 flags = revinfo.flags
399 434 node = revinfo.node
400 435
401 436 baserev = cachedelta[0]
402 437 delta = cachedelta[1]
403 438 # special case deltas which replace entire base; no need to decode
404 439 # base revision. this neatly avoids censored bases, which throw when
405 440 # they're decoded.
406 441 hlen = struct.calcsize(">lll")
407 442 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
408 443 len(delta) - hlen):
409 444 btext[0] = delta[hlen:]
410 445 else:
411 446 # deltabase is rawtext before changed by flag processors, which is
412 447 # equivalent to non-raw text
413 448 basetext = revlog.revision(baserev, _df=fh, raw=False)
414 449 btext[0] = mdiff.patch(basetext, delta)
415 450
416 451 try:
417 452 res = revlog._processflags(btext[0], flags, 'read', raw=True)
418 453 btext[0], validatehash = res
419 454 if validatehash:
420 455 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
421 456 if flags & REVIDX_ISCENSORED:
422 457 raise RevlogError(_('node %s is not censored') % node)
423 458 except CensoredNodeError:
424 459 # must pass the censored index flag to add censored revisions
425 460 if not flags & REVIDX_ISCENSORED:
426 461 raise
427 462 return btext[0]
428 463
429 464 def _builddeltadiff(self, base, revinfo, fh):
430 465 revlog = self.revlog
431 466 t = self.buildtext(revinfo, fh)
432 467 if revlog.iscensored(base):
433 468 # deltas based on a censored revision must replace the
434 469 # full content in one patch, so delta works everywhere
435 470 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
436 471 delta = header + t
437 472 else:
438 473 ptext = revlog.revision(base, _df=fh, raw=True)
439 474 delta = mdiff.textdiff(ptext, t)
440 475
441 476 return delta
442 477
443 478 def _builddeltainfo(self, revinfo, base, fh):
444 479 # can we use the cached delta?
445 480 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
446 481 delta = revinfo.cachedelta[1]
447 482 else:
448 483 delta = self._builddeltadiff(base, revinfo, fh)
449 484 revlog = self.revlog
450 485 header, data = revlog.compress(delta)
451 486 deltalen = len(header) + len(data)
452 487 chainbase = revlog.chainbase(base)
453 488 offset = revlog.end(len(revlog) - 1)
454 489 dist = deltalen + offset - revlog.start(chainbase)
455 490 if revlog._generaldelta:
456 491 deltabase = base
457 492 else:
458 493 deltabase = chainbase
459 494 chainlen, compresseddeltalen = revlog._chaininfo(base)
460 495 chainlen += 1
461 496 compresseddeltalen += deltalen
462 497 return _deltainfo(dist, deltalen, (header, data), deltabase,
463 498 chainbase, chainlen, compresseddeltalen)
464 499
465 500 def finddeltainfo(self, revinfo, fh):
466 501 """Find an acceptable delta against a candidate revision
467 502
468 503 revinfo: information about the revision (instance of _revisioninfo)
469 504 fh: file handle to either the .i or the .d revlog file,
470 505 depending on whether it is inlined or not
471 506
472 507 Returns the first acceptable candidate revision, as ordered by
473 508 _getcandidaterevs
474 509 """
475 510 cachedelta = revinfo.cachedelta
476 511 p1 = revinfo.p1
477 512 p2 = revinfo.p2
478 513 revlog = self.revlog
479 514
480 515 deltainfo = None
481 516 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
482 517 nominateddeltas = []
483 518 for candidaterev in candidaterevs:
484 519 # no delta for rawtext-changing revs (see "candelta" for why)
485 520 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
486 521 continue
487 522 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
488 523 if revlog._isgooddeltainfo(candidatedelta, revinfo):
489 524 nominateddeltas.append(candidatedelta)
490 525 if nominateddeltas:
491 526 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
492 527 break
493 528
494 529 return deltainfo
495 530
496 531 @attr.s(slots=True, frozen=True)
497 532 class _revisioninfo(object):
498 533 """Information about a revision that allows building its fulltext
499 534 node: expected hash of the revision
500 535 p1, p2: parent revs of the revision
501 536 btext: built text cache consisting of a one-element list
502 537 cachedelta: (baserev, uncompressed_delta) or None
503 538 flags: flags associated to the revision storage
504 539
505 540 One of btext[0] or cachedelta must be set.
506 541 """
507 542 node = attr.ib()
508 543 p1 = attr.ib()
509 544 p2 = attr.ib()
510 545 btext = attr.ib()
511 546 textlen = attr.ib()
512 547 cachedelta = attr.ib()
513 548 flags = attr.ib()
514 549
515 550 # index v0:
516 551 # 4 bytes: offset
517 552 # 4 bytes: compressed length
518 553 # 4 bytes: base rev
519 554 # 4 bytes: link rev
520 555 # 20 bytes: parent 1 nodeid
521 556 # 20 bytes: parent 2 nodeid
522 557 # 20 bytes: nodeid
523 558 indexformatv0 = struct.Struct(">4l20s20s20s")
524 559 indexformatv0_pack = indexformatv0.pack
525 560 indexformatv0_unpack = indexformatv0.unpack
526 561
527 562 class revlogoldio(object):
528 563 def __init__(self):
529 564 self.size = indexformatv0.size
530 565
531 566 def parseindex(self, data, inline):
532 567 s = self.size
533 568 index = []
534 569 nodemap = {nullid: nullrev}
535 570 n = off = 0
536 571 l = len(data)
537 572 while off + s <= l:
538 573 cur = data[off:off + s]
539 574 off += s
540 575 e = indexformatv0_unpack(cur)
541 576 # transform to revlogv1 format
542 577 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
543 578 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
544 579 index.append(e2)
545 580 nodemap[e[6]] = n
546 581 n += 1
547 582
548 583 # add the magic null revision at -1
549 584 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
550 585
551 586 return index, nodemap, None
552 587
553 588 def packentry(self, entry, node, version, rev):
554 589 if gettype(entry[0]):
555 590 raise RevlogError(_('index entry flags need revlog version 1'))
556 591 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
557 592 node(entry[5]), node(entry[6]), entry[7])
558 593 return indexformatv0_pack(*e2)
559 594
560 595 # index ng:
561 596 # 6 bytes: offset
562 597 # 2 bytes: flags
563 598 # 4 bytes: compressed length
564 599 # 4 bytes: uncompressed length
565 600 # 4 bytes: base rev
566 601 # 4 bytes: link rev
567 602 # 4 bytes: parent 1 rev
568 603 # 4 bytes: parent 2 rev
569 604 # 32 bytes: nodeid
570 605 indexformatng = struct.Struct(">Qiiiiii20s12x")
571 606 indexformatng_pack = indexformatng.pack
572 607 versionformat = struct.Struct(">I")
573 608 versionformat_pack = versionformat.pack
574 609 versionformat_unpack = versionformat.unpack
575 610
576 611 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
577 612 # signed integer)
578 613 _maxentrysize = 0x7fffffff
579 614
580 615 class revlogio(object):
581 616 def __init__(self):
582 617 self.size = indexformatng.size
583 618
584 619 def parseindex(self, data, inline):
585 620 # call the C implementation to parse the index data
586 621 index, cache = parsers.parse_index2(data, inline)
587 622 return index, getattr(index, 'nodemap', None), cache
588 623
589 624 def packentry(self, entry, node, version, rev):
590 625 p = indexformatng_pack(*entry)
591 626 if rev == 0:
592 627 p = versionformat_pack(version) + p[4:]
593 628 return p
594 629
595 630 class revlog(object):
596 631 """
597 632 the underlying revision storage object
598 633
599 634 A revlog consists of two parts, an index and the revision data.
600 635
601 636 The index is a file with a fixed record size containing
602 637 information on each revision, including its nodeid (hash), the
603 638 nodeids of its parents, the position and offset of its data within
604 639 the data file, and the revision it's based on. Finally, each entry
605 640 contains a linkrev entry that can serve as a pointer to external
606 641 data.
607 642
608 643 The revision data itself is a linear collection of data chunks.
609 644 Each chunk represents a revision and is usually represented as a
610 645 delta against the previous chunk. To bound lookup time, runs of
611 646 deltas are limited to about 2 times the length of the original
612 647 version data. This makes retrieval of a version proportional to
613 648 its size, or O(1) relative to the number of revisions.
614 649
615 650 Both pieces of the revlog are written to in an append-only
616 651 fashion, which means we never need to rewrite a file to insert or
617 652 remove data, and can use some simple techniques to avoid the need
618 653 for locking while reading.
619 654
620 655 If checkambig, indexfile is opened with checkambig=True at
621 656 writing, to avoid file stat ambiguity.
622 657
623 658 If mmaplargeindex is True, and an mmapindexthreshold is set, the
624 659 index will be mmapped rather than read if it is larger than the
625 660 configured threshold.
626 661
627 662 If censorable is True, the revlog can have censored revisions.
628 663 """
629 664 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
630 665 mmaplargeindex=False, censorable=False):
631 666 """
632 667 create a revlog object
633 668
634 669 opener is a function that abstracts the file opening operation
635 670 and can be used to implement COW semantics or the like.
636 671 """
637 672 self.indexfile = indexfile
638 673 self.datafile = datafile or (indexfile[:-2] + ".d")
639 674 self.opener = opener
640 675 # When True, indexfile is opened with checkambig=True at writing, to
641 676 # avoid file stat ambiguity.
642 677 self._checkambig = checkambig
643 678 self._censorable = censorable
644 679 # 3-tuple of (node, rev, text) for a raw revision.
645 680 self._cache = None
646 681 # Maps rev to chain base rev.
647 682 self._chainbasecache = util.lrucachedict(100)
648 683 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
649 684 self._chunkcache = (0, '')
650 685 # How much data to read and cache into the raw revlog data cache.
651 686 self._chunkcachesize = 65536
652 687 self._maxchainlen = None
653 688 self._aggressivemergedeltas = True
654 689 self.index = []
655 690 # Mapping of partial identifiers to full nodes.
656 691 self._pcache = {}
657 692 # Mapping of revision integer to full node.
658 693 self._nodecache = {nullid: nullrev}
659 694 self._nodepos = None
660 695 self._compengine = 'zlib'
661 696 self._maxdeltachainspan = -1
662 697 self._withsparseread = False
663 698 self._srdensitythreshold = 0.50
664 699 self._srmingapsize = 262144
665 700
666 701 mmapindexthreshold = None
667 702 v = REVLOG_DEFAULT_VERSION
668 703 opts = getattr(opener, 'options', None)
669 704 if opts is not None:
670 705 if 'revlogv2' in opts:
671 706 # version 2 revlogs always use generaldelta.
672 707 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
673 708 elif 'revlogv1' in opts:
674 709 if 'generaldelta' in opts:
675 710 v |= FLAG_GENERALDELTA
676 711 else:
677 712 v = 0
678 713 if 'chunkcachesize' in opts:
679 714 self._chunkcachesize = opts['chunkcachesize']
680 715 if 'maxchainlen' in opts:
681 716 self._maxchainlen = opts['maxchainlen']
682 717 if 'aggressivemergedeltas' in opts:
683 718 self._aggressivemergedeltas = opts['aggressivemergedeltas']
684 719 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
685 720 if 'compengine' in opts:
686 721 self._compengine = opts['compengine']
687 722 if 'maxdeltachainspan' in opts:
688 723 self._maxdeltachainspan = opts['maxdeltachainspan']
689 724 if mmaplargeindex and 'mmapindexthreshold' in opts:
690 725 mmapindexthreshold = opts['mmapindexthreshold']
691 726 self._withsparseread = bool(opts.get('with-sparse-read', False))
692 727 if 'sparse-read-density-threshold' in opts:
693 728 self._srdensitythreshold = opts['sparse-read-density-threshold']
694 729 if 'sparse-read-min-gap-size' in opts:
695 730 self._srmingapsize = opts['sparse-read-min-gap-size']
696 731
697 732 if self._chunkcachesize <= 0:
698 733 raise RevlogError(_('revlog chunk cache size %r is not greater '
699 734 'than 0') % self._chunkcachesize)
700 735 elif self._chunkcachesize & (self._chunkcachesize - 1):
701 736 raise RevlogError(_('revlog chunk cache size %r is not a power '
702 737 'of 2') % self._chunkcachesize)
703 738
704 739 indexdata = ''
705 740 self._initempty = True
706 741 try:
707 742 with self._indexfp() as f:
708 743 if (mmapindexthreshold is not None and
709 744 self.opener.fstat(f).st_size >= mmapindexthreshold):
710 745 indexdata = util.buffer(util.mmapread(f))
711 746 else:
712 747 indexdata = f.read()
713 748 if len(indexdata) > 0:
714 749 v = versionformat_unpack(indexdata[:4])[0]
715 750 self._initempty = False
716 751 except IOError as inst:
717 752 if inst.errno != errno.ENOENT:
718 753 raise
719 754
720 755 self.version = v
721 756 self._inline = v & FLAG_INLINE_DATA
722 757 self._generaldelta = v & FLAG_GENERALDELTA
723 758 flags = v & ~0xFFFF
724 759 fmt = v & 0xFFFF
725 760 if fmt == REVLOGV0:
726 761 if flags:
727 762 raise RevlogError(_('unknown flags (%#04x) in version %d '
728 763 'revlog %s') %
729 764 (flags >> 16, fmt, self.indexfile))
730 765 elif fmt == REVLOGV1:
731 766 if flags & ~REVLOGV1_FLAGS:
732 767 raise RevlogError(_('unknown flags (%#04x) in version %d '
733 768 'revlog %s') %
734 769 (flags >> 16, fmt, self.indexfile))
735 770 elif fmt == REVLOGV2:
736 771 if flags & ~REVLOGV2_FLAGS:
737 772 raise RevlogError(_('unknown flags (%#04x) in version %d '
738 773 'revlog %s') %
739 774 (flags >> 16, fmt, self.indexfile))
740 775 else:
741 776 raise RevlogError(_('unknown version (%d) in revlog %s') %
742 777 (fmt, self.indexfile))
743 778
744 779 self.storedeltachains = True
745 780
746 781 self._io = revlogio()
747 782 if self.version == REVLOGV0:
748 783 self._io = revlogoldio()
749 784 try:
750 785 d = self._io.parseindex(indexdata, self._inline)
751 786 except (ValueError, IndexError):
752 787 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
753 788 self.index, nodemap, self._chunkcache = d
754 789 if nodemap is not None:
755 790 self.nodemap = self._nodecache = nodemap
756 791 if not self._chunkcache:
757 792 self._chunkclear()
758 793 # revnum -> (chain-length, sum-delta-length)
759 794 self._chaininfocache = {}
760 795 # revlog header -> revlog compressor
761 796 self._decompressors = {}
762 797
763 798 @util.propertycache
764 799 def _compressor(self):
765 800 return util.compengines[self._compengine].revlogcompressor()
766 801
767 802 def _indexfp(self, mode='r'):
768 803 """file object for the revlog's index file"""
769 804 args = {r'mode': mode}
770 805 if mode != 'r':
771 806 args[r'checkambig'] = self._checkambig
772 807 if mode == 'w':
773 808 args[r'atomictemp'] = True
774 809 return self.opener(self.indexfile, **args)
775 810
776 811 def _datafp(self, mode='r'):
777 812 """file object for the revlog's data file"""
778 813 return self.opener(self.datafile, mode=mode)
779 814
780 815 @contextlib.contextmanager
781 816 def _datareadfp(self, existingfp=None):
782 817 """file object suitable to read data"""
783 818 if existingfp is not None:
784 819 yield existingfp
785 820 else:
786 821 if self._inline:
787 822 func = self._indexfp
788 823 else:
789 824 func = self._datafp
790 825 with func() as fp:
791 826 yield fp
792 827
793 828 def tip(self):
794 829 return self.node(len(self.index) - 2)
795 830 def __contains__(self, rev):
796 831 return 0 <= rev < len(self)
797 832 def __len__(self):
798 833 return len(self.index) - 1
799 834 def __iter__(self):
800 835 return iter(xrange(len(self)))
801 836 def revs(self, start=0, stop=None):
802 837 """iterate over all rev in this revlog (from start to stop)"""
803 838 step = 1
804 839 if stop is not None:
805 840 if start > stop:
806 841 step = -1
807 842 stop += step
808 843 else:
809 844 stop = len(self)
810 845 return xrange(start, stop, step)
811 846
812 847 @util.propertycache
813 848 def nodemap(self):
814 849 self.rev(self.node(0))
815 850 return self._nodecache
816 851
817 852 def hasnode(self, node):
818 853 try:
819 854 self.rev(node)
820 855 return True
821 856 except KeyError:
822 857 return False
823 858
824 859 def candelta(self, baserev, rev):
825 860 """whether two revisions (baserev, rev) can be delta-ed or not"""
826 861 # Disable delta if either rev requires a content-changing flag
827 862 # processor (ex. LFS). This is because such flag processor can alter
828 863 # the rawtext content that the delta will be based on, and two clients
829 864 # could have a same revlog node with different flags (i.e. different
830 865 # rawtext contents) and the delta could be incompatible.
831 866 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
832 867 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
833 868 return False
834 869 return True
835 870
836 871 def clearcaches(self):
837 872 self._cache = None
838 873 self._chainbasecache.clear()
839 874 self._chunkcache = (0, '')
840 875 self._pcache = {}
841 876
842 877 try:
843 878 self._nodecache.clearcaches()
844 879 except AttributeError:
845 880 self._nodecache = {nullid: nullrev}
846 881 self._nodepos = None
847 882
848 883 def rev(self, node):
849 884 try:
850 885 return self._nodecache[node]
851 886 except TypeError:
852 887 raise
853 888 except RevlogError:
854 889 # parsers.c radix tree lookup failed
855 890 if node == wdirid or node in wdirfilenodeids:
856 891 raise error.WdirUnsupported
857 892 raise LookupError(node, self.indexfile, _('no node'))
858 893 except KeyError:
859 894 # pure python cache lookup failed
860 895 n = self._nodecache
861 896 i = self.index
862 897 p = self._nodepos
863 898 if p is None:
864 899 p = len(i) - 2
865 900 else:
866 901 assert p < len(i)
867 902 for r in xrange(p, -1, -1):
868 903 v = i[r][7]
869 904 n[v] = r
870 905 if v == node:
871 906 self._nodepos = r - 1
872 907 return r
873 908 if node == wdirid or node in wdirfilenodeids:
874 909 raise error.WdirUnsupported
875 910 raise LookupError(node, self.indexfile, _('no node'))
876 911
877 912 # Accessors for index entries.
878 913
879 914 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
880 915 # are flags.
881 916 def start(self, rev):
882 917 return int(self.index[rev][0] >> 16)
883 918
884 919 def flags(self, rev):
885 920 return self.index[rev][0] & 0xFFFF
886 921
887 922 def length(self, rev):
888 923 return self.index[rev][1]
889 924
890 925 def rawsize(self, rev):
891 926 """return the length of the uncompressed text for a given revision"""
892 927 l = self.index[rev][2]
893 928 if l >= 0:
894 929 return l
895 930
896 931 t = self.revision(rev, raw=True)
897 932 return len(t)
898 933
899 934 def size(self, rev):
900 935 """length of non-raw text (processed by a "read" flag processor)"""
901 936 # fast path: if no "read" flag processor could change the content,
902 937 # size is rawsize. note: ELLIPSIS is known to not change the content.
903 938 flags = self.flags(rev)
904 939 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
905 940 return self.rawsize(rev)
906 941
907 942 return len(self.revision(rev, raw=False))
908 943
909 944 def chainbase(self, rev):
910 945 base = self._chainbasecache.get(rev)
911 946 if base is not None:
912 947 return base
913 948
914 949 index = self.index
915 950 iterrev = rev
916 951 base = index[iterrev][3]
917 952 while base != iterrev:
918 953 iterrev = base
919 954 base = index[iterrev][3]
920 955
921 956 self._chainbasecache[rev] = base
922 957 return base
923 958
924 959 def linkrev(self, rev):
925 960 return self.index[rev][4]
926 961
927 962 def parentrevs(self, rev):
928 963 try:
929 964 entry = self.index[rev]
930 965 except IndexError:
931 966 if rev == wdirrev:
932 967 raise error.WdirUnsupported
933 968 raise
934 969
935 970 return entry[5], entry[6]
936 971
937 972 def node(self, rev):
938 973 try:
939 974 return self.index[rev][7]
940 975 except IndexError:
941 976 if rev == wdirrev:
942 977 raise error.WdirUnsupported
943 978 raise
944 979
945 980 # Derived from index values.
946 981
947 982 def end(self, rev):
948 983 return self.start(rev) + self.length(rev)
949 984
950 985 def parents(self, node):
951 986 i = self.index
952 987 d = i[self.rev(node)]
953 988 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
954 989
955 990 def chainlen(self, rev):
956 991 return self._chaininfo(rev)[0]
957 992
958 993 def _chaininfo(self, rev):
959 994 chaininfocache = self._chaininfocache
960 995 if rev in chaininfocache:
961 996 return chaininfocache[rev]
962 997 index = self.index
963 998 generaldelta = self._generaldelta
964 999 iterrev = rev
965 1000 e = index[iterrev]
966 1001 clen = 0
967 1002 compresseddeltalen = 0
968 1003 while iterrev != e[3]:
969 1004 clen += 1
970 1005 compresseddeltalen += e[1]
971 1006 if generaldelta:
972 1007 iterrev = e[3]
973 1008 else:
974 1009 iterrev -= 1
975 1010 if iterrev in chaininfocache:
976 1011 t = chaininfocache[iterrev]
977 1012 clen += t[0]
978 1013 compresseddeltalen += t[1]
979 1014 break
980 1015 e = index[iterrev]
981 1016 else:
982 1017 # Add text length of base since decompressing that also takes
983 1018 # work. For cache hits the length is already included.
984 1019 compresseddeltalen += e[1]
985 1020 r = (clen, compresseddeltalen)
986 1021 chaininfocache[rev] = r
987 1022 return r
988 1023
989 1024 def _deltachain(self, rev, stoprev=None):
990 1025 """Obtain the delta chain for a revision.
991 1026
992 1027 ``stoprev`` specifies a revision to stop at. If not specified, we
993 1028 stop at the base of the chain.
994 1029
995 1030 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
996 1031 revs in ascending order and ``stopped`` is a bool indicating whether
997 1032 ``stoprev`` was hit.
998 1033 """
999 1034 # Try C implementation.
1000 1035 try:
1001 1036 return self.index.deltachain(rev, stoprev, self._generaldelta)
1002 1037 except AttributeError:
1003 1038 pass
1004 1039
1005 1040 chain = []
1006 1041
1007 1042 # Alias to prevent attribute lookup in tight loop.
1008 1043 index = self.index
1009 1044 generaldelta = self._generaldelta
1010 1045
1011 1046 iterrev = rev
1012 1047 e = index[iterrev]
1013 1048 while iterrev != e[3] and iterrev != stoprev:
1014 1049 chain.append(iterrev)
1015 1050 if generaldelta:
1016 1051 iterrev = e[3]
1017 1052 else:
1018 1053 iterrev -= 1
1019 1054 e = index[iterrev]
1020 1055
1021 1056 if iterrev == stoprev:
1022 1057 stopped = True
1023 1058 else:
1024 1059 chain.append(iterrev)
1025 1060 stopped = False
1026 1061
1027 1062 chain.reverse()
1028 1063 return chain, stopped
1029 1064
1030 1065 def ancestors(self, revs, stoprev=0, inclusive=False):
1031 1066 """Generate the ancestors of 'revs' in reverse topological order.
1032 1067 Does not generate revs lower than stoprev.
1033 1068
1034 1069 See the documentation for ancestor.lazyancestors for more details."""
1035 1070
1036 1071 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1037 1072 inclusive=inclusive)
1038 1073
1039 1074 def descendants(self, revs):
1040 1075 """Generate the descendants of 'revs' in revision order.
1041 1076
1042 1077 Yield a sequence of revision numbers starting with a child of
1043 1078 some rev in revs, i.e., each revision is *not* considered a
1044 1079 descendant of itself. Results are ordered by revision number (a
1045 1080 topological sort)."""
1046 1081 first = min(revs)
1047 1082 if first == nullrev:
1048 1083 for i in self:
1049 1084 yield i
1050 1085 return
1051 1086
1052 1087 seen = set(revs)
1053 1088 for i in self.revs(start=first + 1):
1054 1089 for x in self.parentrevs(i):
1055 1090 if x != nullrev and x in seen:
1056 1091 seen.add(i)
1057 1092 yield i
1058 1093 break
1059 1094
1060 1095 def findcommonmissing(self, common=None, heads=None):
1061 1096 """Return a tuple of the ancestors of common and the ancestors of heads
1062 1097 that are not ancestors of common. In revset terminology, we return the
1063 1098 tuple:
1064 1099
1065 1100 ::common, (::heads) - (::common)
1066 1101
1067 1102 The list is sorted by revision number, meaning it is
1068 1103 topologically sorted.
1069 1104
1070 1105 'heads' and 'common' are both lists of node IDs. If heads is
1071 1106 not supplied, uses all of the revlog's heads. If common is not
1072 1107 supplied, uses nullid."""
1073 1108 if common is None:
1074 1109 common = [nullid]
1075 1110 if heads is None:
1076 1111 heads = self.heads()
1077 1112
1078 1113 common = [self.rev(n) for n in common]
1079 1114 heads = [self.rev(n) for n in heads]
1080 1115
1081 1116 # we want the ancestors, but inclusive
1082 1117 class lazyset(object):
1083 1118 def __init__(self, lazyvalues):
1084 1119 self.addedvalues = set()
1085 1120 self.lazyvalues = lazyvalues
1086 1121
1087 1122 def __contains__(self, value):
1088 1123 return value in self.addedvalues or value in self.lazyvalues
1089 1124
1090 1125 def __iter__(self):
1091 1126 added = self.addedvalues
1092 1127 for r in added:
1093 1128 yield r
1094 1129 for r in self.lazyvalues:
1095 1130 if not r in added:
1096 1131 yield r
1097 1132
1098 1133 def add(self, value):
1099 1134 self.addedvalues.add(value)
1100 1135
1101 1136 def update(self, values):
1102 1137 self.addedvalues.update(values)
1103 1138
1104 1139 has = lazyset(self.ancestors(common))
1105 1140 has.add(nullrev)
1106 1141 has.update(common)
1107 1142
1108 1143 # take all ancestors from heads that aren't in has
1109 1144 missing = set()
1110 1145 visit = collections.deque(r for r in heads if r not in has)
1111 1146 while visit:
1112 1147 r = visit.popleft()
1113 1148 if r in missing:
1114 1149 continue
1115 1150 else:
1116 1151 missing.add(r)
1117 1152 for p in self.parentrevs(r):
1118 1153 if p not in has:
1119 1154 visit.append(p)
1120 1155 missing = list(missing)
1121 1156 missing.sort()
1122 1157 return has, [self.node(miss) for miss in missing]
1123 1158
1124 1159 def incrementalmissingrevs(self, common=None):
1125 1160 """Return an object that can be used to incrementally compute the
1126 1161 revision numbers of the ancestors of arbitrary sets that are not
1127 1162 ancestors of common. This is an ancestor.incrementalmissingancestors
1128 1163 object.
1129 1164
1130 1165 'common' is a list of revision numbers. If common is not supplied, uses
1131 1166 nullrev.
1132 1167 """
1133 1168 if common is None:
1134 1169 common = [nullrev]
1135 1170
1136 1171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137 1172
1138 1173 def findmissingrevs(self, common=None, heads=None):
1139 1174 """Return the revision numbers of the ancestors of heads that
1140 1175 are not ancestors of common.
1141 1176
1142 1177 More specifically, return a list of revision numbers corresponding to
1143 1178 nodes N such that every N satisfies the following constraints:
1144 1179
1145 1180 1. N is an ancestor of some node in 'heads'
1146 1181 2. N is not an ancestor of any node in 'common'
1147 1182
1148 1183 The list is sorted by revision number, meaning it is
1149 1184 topologically sorted.
1150 1185
1151 1186 'heads' and 'common' are both lists of revision numbers. If heads is
1152 1187 not supplied, uses all of the revlog's heads. If common is not
1153 1188 supplied, uses nullid."""
1154 1189 if common is None:
1155 1190 common = [nullrev]
1156 1191 if heads is None:
1157 1192 heads = self.headrevs()
1158 1193
1159 1194 inc = self.incrementalmissingrevs(common=common)
1160 1195 return inc.missingancestors(heads)
1161 1196
1162 1197 def findmissing(self, common=None, heads=None):
1163 1198 """Return the ancestors of heads that are not ancestors of common.
1164 1199
1165 1200 More specifically, return a list of nodes N such that every N
1166 1201 satisfies the following constraints:
1167 1202
1168 1203 1. N is an ancestor of some node in 'heads'
1169 1204 2. N is not an ancestor of any node in 'common'
1170 1205
1171 1206 The list is sorted by revision number, meaning it is
1172 1207 topologically sorted.
1173 1208
1174 1209 'heads' and 'common' are both lists of node IDs. If heads is
1175 1210 not supplied, uses all of the revlog's heads. If common is not
1176 1211 supplied, uses nullid."""
1177 1212 if common is None:
1178 1213 common = [nullid]
1179 1214 if heads is None:
1180 1215 heads = self.heads()
1181 1216
1182 1217 common = [self.rev(n) for n in common]
1183 1218 heads = [self.rev(n) for n in heads]
1184 1219
1185 1220 inc = self.incrementalmissingrevs(common=common)
1186 1221 return [self.node(r) for r in inc.missingancestors(heads)]
1187 1222
1188 1223 def nodesbetween(self, roots=None, heads=None):
1189 1224 """Return a topological path from 'roots' to 'heads'.
1190 1225
1191 1226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 1227 topologically sorted list of all nodes N that satisfy both of
1193 1228 these constraints:
1194 1229
1195 1230 1. N is a descendant of some node in 'roots'
1196 1231 2. N is an ancestor of some node in 'heads'
1197 1232
1198 1233 Every node is considered to be both a descendant and an ancestor
1199 1234 of itself, so every reachable node in 'roots' and 'heads' will be
1200 1235 included in 'nodes'.
1201 1236
1202 1237 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 1238 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 1239 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205 1240
1206 1241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 1242 unspecified, uses nullid as the only root. If 'heads' is
1208 1243 unspecified, uses list of all of the revlog's heads."""
1209 1244 nonodes = ([], [], [])
1210 1245 if roots is not None:
1211 1246 roots = list(roots)
1212 1247 if not roots:
1213 1248 return nonodes
1214 1249 lowestrev = min([self.rev(n) for n in roots])
1215 1250 else:
1216 1251 roots = [nullid] # Everybody's a descendant of nullid
1217 1252 lowestrev = nullrev
1218 1253 if (lowestrev == nullrev) and (heads is None):
1219 1254 # We want _all_ the nodes!
1220 1255 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1221 1256 if heads is None:
1222 1257 # All nodes are ancestors, so the latest ancestor is the last
1223 1258 # node.
1224 1259 highestrev = len(self) - 1
1225 1260 # Set ancestors to None to signal that every node is an ancestor.
1226 1261 ancestors = None
1227 1262 # Set heads to an empty dictionary for later discovery of heads
1228 1263 heads = {}
1229 1264 else:
1230 1265 heads = list(heads)
1231 1266 if not heads:
1232 1267 return nonodes
1233 1268 ancestors = set()
1234 1269 # Turn heads into a dictionary so we can remove 'fake' heads.
1235 1270 # Also, later we will be using it to filter out the heads we can't
1236 1271 # find from roots.
1237 1272 heads = dict.fromkeys(heads, False)
1238 1273 # Start at the top and keep marking parents until we're done.
1239 1274 nodestotag = set(heads)
1240 1275 # Remember where the top was so we can use it as a limit later.
1241 1276 highestrev = max([self.rev(n) for n in nodestotag])
1242 1277 while nodestotag:
1243 1278 # grab a node to tag
1244 1279 n = nodestotag.pop()
1245 1280 # Never tag nullid
1246 1281 if n == nullid:
1247 1282 continue
1248 1283 # A node's revision number represents its place in a
1249 1284 # topologically sorted list of nodes.
1250 1285 r = self.rev(n)
1251 1286 if r >= lowestrev:
1252 1287 if n not in ancestors:
1253 1288 # If we are possibly a descendant of one of the roots
1254 1289 # and we haven't already been marked as an ancestor
1255 1290 ancestors.add(n) # Mark as ancestor
1256 1291 # Add non-nullid parents to list of nodes to tag.
1257 1292 nodestotag.update([p for p in self.parents(n) if
1258 1293 p != nullid])
1259 1294 elif n in heads: # We've seen it before, is it a fake head?
1260 1295 # So it is, real heads should not be the ancestors of
1261 1296 # any other heads.
1262 1297 heads.pop(n)
1263 1298 if not ancestors:
1264 1299 return nonodes
1265 1300 # Now that we have our set of ancestors, we want to remove any
1266 1301 # roots that are not ancestors.
1267 1302
1268 1303 # If one of the roots was nullid, everything is included anyway.
1269 1304 if lowestrev > nullrev:
1270 1305 # But, since we weren't, let's recompute the lowest rev to not
1271 1306 # include roots that aren't ancestors.
1272 1307
1273 1308 # Filter out roots that aren't ancestors of heads
1274 1309 roots = [root for root in roots if root in ancestors]
1275 1310 # Recompute the lowest revision
1276 1311 if roots:
1277 1312 lowestrev = min([self.rev(root) for root in roots])
1278 1313 else:
1279 1314 # No more roots? Return empty list
1280 1315 return nonodes
1281 1316 else:
1282 1317 # We are descending from nullid, and don't need to care about
1283 1318 # any other roots.
1284 1319 lowestrev = nullrev
1285 1320 roots = [nullid]
1286 1321 # Transform our roots list into a set.
1287 1322 descendants = set(roots)
1288 1323 # Also, keep the original roots so we can filter out roots that aren't
1289 1324 # 'real' roots (i.e. are descended from other roots).
1290 1325 roots = descendants.copy()
1291 1326 # Our topologically sorted list of output nodes.
1292 1327 orderedout = []
1293 1328 # Don't start at nullid since we don't want nullid in our output list,
1294 1329 # and if nullid shows up in descendants, empty parents will look like
1295 1330 # they're descendants.
1296 1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1297 1332 n = self.node(r)
1298 1333 isdescendant = False
1299 1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1300 1335 isdescendant = True
1301 1336 elif n in descendants:
1302 1337 # n is already a descendant
1303 1338 isdescendant = True
1304 1339 # This check only needs to be done here because all the roots
1305 1340 # will start being marked is descendants before the loop.
1306 1341 if n in roots:
1307 1342 # If n was a root, check if it's a 'real' root.
1308 1343 p = tuple(self.parents(n))
1309 1344 # If any of its parents are descendants, it's not a root.
1310 1345 if (p[0] in descendants) or (p[1] in descendants):
1311 1346 roots.remove(n)
1312 1347 else:
1313 1348 p = tuple(self.parents(n))
1314 1349 # A node is a descendant if either of its parents are
1315 1350 # descendants. (We seeded the dependents list with the roots
1316 1351 # up there, remember?)
1317 1352 if (p[0] in descendants) or (p[1] in descendants):
1318 1353 descendants.add(n)
1319 1354 isdescendant = True
1320 1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1321 1356 # Only include nodes that are both descendants and ancestors.
1322 1357 orderedout.append(n)
1323 1358 if (ancestors is not None) and (n in heads):
1324 1359 # We're trying to figure out which heads are reachable
1325 1360 # from roots.
1326 1361 # Mark this head as having been reached
1327 1362 heads[n] = True
1328 1363 elif ancestors is None:
1329 1364 # Otherwise, we're trying to discover the heads.
1330 1365 # Assume this is a head because if it isn't, the next step
1331 1366 # will eventually remove it.
1332 1367 heads[n] = True
1333 1368 # But, obviously its parents aren't.
1334 1369 for p in self.parents(n):
1335 1370 heads.pop(p, None)
1336 1371 heads = [head for head, flag in heads.iteritems() if flag]
1337 1372 roots = list(roots)
1338 1373 assert orderedout
1339 1374 assert roots
1340 1375 assert heads
1341 1376 return (orderedout, roots, heads)
1342 1377
1343 1378 def headrevs(self):
1344 1379 try:
1345 1380 return self.index.headrevs()
1346 1381 except AttributeError:
1347 1382 return self._headrevs()
1348 1383
1349 1384 def computephases(self, roots):
1350 1385 return self.index.computephasesmapsets(roots)
1351 1386
1352 1387 def _headrevs(self):
1353 1388 count = len(self)
1354 1389 if not count:
1355 1390 return [nullrev]
1356 1391 # we won't iter over filtered rev so nobody is a head at start
1357 1392 ishead = [0] * (count + 1)
1358 1393 index = self.index
1359 1394 for r in self:
1360 1395 ishead[r] = 1 # I may be an head
1361 1396 e = index[r]
1362 1397 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1363 1398 return [r for r, val in enumerate(ishead) if val]
1364 1399
1365 1400 def heads(self, start=None, stop=None):
1366 1401 """return the list of all nodes that have no children
1367 1402
1368 1403 if start is specified, only heads that are descendants of
1369 1404 start will be returned
1370 1405 if stop is specified, it will consider all the revs from stop
1371 1406 as if they had no children
1372 1407 """
1373 1408 if start is None and stop is None:
1374 1409 if not len(self):
1375 1410 return [nullid]
1376 1411 return [self.node(r) for r in self.headrevs()]
1377 1412
1378 1413 if start is None:
1379 1414 start = nullid
1380 1415 if stop is None:
1381 1416 stop = []
1382 1417 stoprevs = set([self.rev(n) for n in stop])
1383 1418 startrev = self.rev(start)
1384 1419 reachable = {startrev}
1385 1420 heads = {startrev}
1386 1421
1387 1422 parentrevs = self.parentrevs
1388 1423 for r in self.revs(start=startrev + 1):
1389 1424 for p in parentrevs(r):
1390 1425 if p in reachable:
1391 1426 if r not in stoprevs:
1392 1427 reachable.add(r)
1393 1428 heads.add(r)
1394 1429 if p in heads and p not in stoprevs:
1395 1430 heads.remove(p)
1396 1431
1397 1432 return [self.node(r) for r in heads]
1398 1433
1399 1434 def children(self, node):
1400 1435 """find the children of a given node"""
1401 1436 c = []
1402 1437 p = self.rev(node)
1403 1438 for r in self.revs(start=p + 1):
1404 1439 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1405 1440 if prevs:
1406 1441 for pr in prevs:
1407 1442 if pr == p:
1408 1443 c.append(self.node(r))
1409 1444 elif p == nullrev:
1410 1445 c.append(self.node(r))
1411 1446 return c
1412 1447
1413 1448 def descendant(self, start, end):
1414 1449 """True if revision 'end' is an descendant of revision 'start'
1415 1450
1416 1451 A revision is considered as a descendant of itself."""
1417 1452 if start == nullrev:
1418 1453 return True
1419 1454 elif start == end:
1420 1455 return True
1421 1456 return start in self._commonancestorsheads(start, end)
1422 1457
1423 1458 def commonancestorsheads(self, a, b):
1424 1459 """calculate all the heads of the common ancestors of nodes a and b"""
1425 1460 a, b = self.rev(a), self.rev(b)
1426 1461 ancs = self._commonancestorsheads(a, b)
1427 1462 return pycompat.maplist(self.node, ancs)
1428 1463
1429 1464 def _commonancestorsheads(self, *revs):
1430 1465 """calculate all the heads of the common ancestors of revs"""
1431 1466 try:
1432 1467 ancs = self.index.commonancestorsheads(*revs)
1433 1468 except (AttributeError, OverflowError): # C implementation failed
1434 1469 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1435 1470 return ancs
1436 1471
1437 1472 def isancestor(self, a, b):
1438 1473 """return True if node a is an ancestor of node b
1439 1474
1440 1475 The implementation of this is trivial but the use of
1441 1476 commonancestorsheads is not."""
1442 1477 a, b = self.rev(a), self.rev(b)
1443 1478 return self.descendant(a, b)
1444 1479
1445 1480 def ancestor(self, a, b):
1446 1481 """calculate the "best" common ancestor of nodes a and b"""
1447 1482
1448 1483 a, b = self.rev(a), self.rev(b)
1449 1484 try:
1450 1485 ancs = self.index.ancestors(a, b)
1451 1486 except (AttributeError, OverflowError):
1452 1487 ancs = ancestor.ancestors(self.parentrevs, a, b)
1453 1488 if ancs:
1454 1489 # choose a consistent winner when there's a tie
1455 1490 return min(map(self.node, ancs))
1456 1491 return nullid
1457 1492
1458 1493 def _match(self, id):
1459 1494 if isinstance(id, int):
1460 1495 # rev
1461 1496 return self.node(id)
1462 1497 if len(id) == 20:
1463 1498 # possibly a binary node
1464 1499 # odds of a binary node being all hex in ASCII are 1 in 10**25
1465 1500 try:
1466 1501 node = id
1467 1502 self.rev(node) # quick search the index
1468 1503 return node
1469 1504 except LookupError:
1470 1505 pass # may be partial hex id
1471 1506 try:
1472 1507 # str(rev)
1473 1508 rev = int(id)
1474 1509 if "%d" % rev != id:
1475 1510 raise ValueError
1476 1511 if rev < 0:
1477 1512 rev = len(self) + rev
1478 1513 if rev < 0 or rev >= len(self):
1479 1514 raise ValueError
1480 1515 return self.node(rev)
1481 1516 except (ValueError, OverflowError):
1482 1517 pass
1483 1518 if len(id) == 40:
1484 1519 try:
1485 1520 # a full hex nodeid?
1486 1521 node = bin(id)
1487 1522 self.rev(node)
1488 1523 return node
1489 1524 except (TypeError, LookupError):
1490 1525 pass
1491 1526
1492 1527 def _partialmatch(self, id):
1493 1528 # we don't care wdirfilenodeids as they should be always full hash
1494 1529 maybewdir = wdirhex.startswith(id)
1495 1530 try:
1496 1531 partial = self.index.partialmatch(id)
1497 1532 if partial and self.hasnode(partial):
1498 1533 if maybewdir:
1499 1534 # single 'ff...' match in radix tree, ambiguous with wdir
1500 1535 raise RevlogError
1501 1536 return partial
1502 1537 if maybewdir:
1503 1538 # no 'ff...' match in radix tree, wdir identified
1504 1539 raise error.WdirUnsupported
1505 1540 return None
1506 1541 except RevlogError:
1507 1542 # parsers.c radix tree lookup gave multiple matches
1508 1543 # fast path: for unfiltered changelog, radix tree is accurate
1509 1544 if not getattr(self, 'filteredrevs', None):
1510 1545 raise LookupError(id, self.indexfile,
1511 1546 _('ambiguous identifier'))
1512 1547 # fall through to slow path that filters hidden revisions
1513 1548 except (AttributeError, ValueError):
1514 1549 # we are pure python, or key was too short to search radix tree
1515 1550 pass
1516 1551
1517 1552 if id in self._pcache:
1518 1553 return self._pcache[id]
1519 1554
1520 1555 if len(id) <= 40:
1521 1556 try:
1522 1557 # hex(node)[:...]
1523 1558 l = len(id) // 2 # grab an even number of digits
1524 1559 prefix = bin(id[:l * 2])
1525 1560 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1526 1561 nl = [n for n in nl if hex(n).startswith(id) and
1527 1562 self.hasnode(n)]
1528 1563 if len(nl) > 0:
1529 1564 if len(nl) == 1 and not maybewdir:
1530 1565 self._pcache[id] = nl[0]
1531 1566 return nl[0]
1532 1567 raise LookupError(id, self.indexfile,
1533 1568 _('ambiguous identifier'))
1534 1569 if maybewdir:
1535 1570 raise error.WdirUnsupported
1536 1571 return None
1537 1572 except TypeError:
1538 1573 pass
1539 1574
1540 1575 def lookup(self, id):
1541 1576 """locate a node based on:
1542 1577 - revision number or str(revision number)
1543 1578 - nodeid or subset of hex nodeid
1544 1579 """
1545 1580 n = self._match(id)
1546 1581 if n is not None:
1547 1582 return n
1548 1583 n = self._partialmatch(id)
1549 1584 if n:
1550 1585 return n
1551 1586
1552 1587 raise LookupError(id, self.indexfile, _('no match found'))
1553 1588
1554 1589 def shortest(self, node, minlength=1):
1555 1590 """Find the shortest unambiguous prefix that matches node."""
1556 1591 def isvalid(prefix):
1557 1592 try:
1558 1593 node = self._partialmatch(prefix)
1559 1594 except error.RevlogError:
1560 1595 return False
1561 1596 except error.WdirUnsupported:
1562 1597 # single 'ff...' match
1563 1598 return True
1564 1599 if node is None:
1565 1600 raise LookupError(node, self.indexfile, _('no node'))
1566 1601 return True
1567 1602
1568 1603 def maybewdir(prefix):
1569 1604 return all(c == 'f' for c in prefix)
1570 1605
1571 1606 hexnode = hex(node)
1572 1607
1573 1608 def disambiguate(hexnode, minlength):
1574 1609 """Disambiguate against wdirid."""
1575 1610 for length in range(minlength, 41):
1576 1611 prefix = hexnode[:length]
1577 1612 if not maybewdir(prefix):
1578 1613 return prefix
1579 1614
1580 1615 if not getattr(self, 'filteredrevs', None):
1581 1616 try:
1582 1617 length = max(self.index.shortest(node), minlength)
1583 1618 return disambiguate(hexnode, length)
1584 1619 except RevlogError:
1585 1620 if node != wdirid:
1586 1621 raise LookupError(node, self.indexfile, _('no node'))
1587 1622 except AttributeError:
1588 1623 # Fall through to pure code
1589 1624 pass
1590 1625
1591 1626 if node == wdirid:
1592 1627 for length in range(minlength, 41):
1593 1628 prefix = hexnode[:length]
1594 1629 if isvalid(prefix):
1595 1630 return prefix
1596 1631
1597 1632 for length in range(minlength, 41):
1598 1633 prefix = hexnode[:length]
1599 1634 if isvalid(prefix):
1600 1635 return disambiguate(hexnode, length)
1601 1636
1602 1637 def cmp(self, node, text):
1603 1638 """compare text with a given file revision
1604 1639
1605 1640 returns True if text is different than what is stored.
1606 1641 """
1607 1642 p1, p2 = self.parents(node)
1608 1643 return hash(text, p1, p2) != node
1609 1644
1610 1645 def _cachesegment(self, offset, data):
1611 1646 """Add a segment to the revlog cache.
1612 1647
1613 1648 Accepts an absolute offset and the data that is at that location.
1614 1649 """
1615 1650 o, d = self._chunkcache
1616 1651 # try to add to existing cache
1617 1652 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1618 1653 self._chunkcache = o, d + data
1619 1654 else:
1620 1655 self._chunkcache = offset, data
1621 1656
1622 1657 def _readsegment(self, offset, length, df=None):
1623 1658 """Load a segment of raw data from the revlog.
1624 1659
1625 1660 Accepts an absolute offset, length to read, and an optional existing
1626 1661 file handle to read from.
1627 1662
1628 1663 If an existing file handle is passed, it will be seeked and the
1629 1664 original seek position will NOT be restored.
1630 1665
1631 1666 Returns a str or buffer of raw byte data.
1632 1667 """
1633 1668 # Cache data both forward and backward around the requested
1634 1669 # data, in a fixed size window. This helps speed up operations
1635 1670 # involving reading the revlog backwards.
1636 1671 cachesize = self._chunkcachesize
1637 1672 realoffset = offset & ~(cachesize - 1)
1638 1673 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1639 1674 - realoffset)
1640 1675 with self._datareadfp(df) as df:
1641 1676 df.seek(realoffset)
1642 1677 d = df.read(reallength)
1643 1678 self._cachesegment(realoffset, d)
1644 1679 if offset != realoffset or reallength != length:
1645 1680 return util.buffer(d, offset - realoffset, length)
1646 1681 return d
1647 1682
1648 1683 def _getsegment(self, offset, length, df=None):
1649 1684 """Obtain a segment of raw data from the revlog.
1650 1685
1651 1686 Accepts an absolute offset, length of bytes to obtain, and an
1652 1687 optional file handle to the already-opened revlog. If the file
1653 1688 handle is used, it's original seek position will not be preserved.
1654 1689
1655 1690 Requests for data may be returned from a cache.
1656 1691
1657 1692 Returns a str or a buffer instance of raw byte data.
1658 1693 """
1659 1694 o, d = self._chunkcache
1660 1695 l = len(d)
1661 1696
1662 1697 # is it in the cache?
1663 1698 cachestart = offset - o
1664 1699 cacheend = cachestart + length
1665 1700 if cachestart >= 0 and cacheend <= l:
1666 1701 if cachestart == 0 and cacheend == l:
1667 1702 return d # avoid a copy
1668 1703 return util.buffer(d, cachestart, cacheend - cachestart)
1669 1704
1670 1705 return self._readsegment(offset, length, df=df)
1671 1706
1672 1707 def _getsegmentforrevs(self, startrev, endrev, df=None):
1673 1708 """Obtain a segment of raw data corresponding to a range of revisions.
1674 1709
1675 1710 Accepts the start and end revisions and an optional already-open
1676 1711 file handle to be used for reading. If the file handle is read, its
1677 1712 seek position will not be preserved.
1678 1713
1679 1714 Requests for data may be satisfied by a cache.
1680 1715
1681 1716 Returns a 2-tuple of (offset, data) for the requested range of
1682 1717 revisions. Offset is the integer offset from the beginning of the
1683 1718 revlog and data is a str or buffer of the raw byte data.
1684 1719
1685 1720 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1686 1721 to determine where each revision's data begins and ends.
1687 1722 """
1688 1723 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1689 1724 # (functions are expensive).
1690 1725 index = self.index
1691 1726 istart = index[startrev]
1692 1727 start = int(istart[0] >> 16)
1693 1728 if startrev == endrev:
1694 1729 end = start + istart[1]
1695 1730 else:
1696 1731 iend = index[endrev]
1697 1732 end = int(iend[0] >> 16) + iend[1]
1698 1733
1699 1734 if self._inline:
1700 1735 start += (startrev + 1) * self._io.size
1701 1736 end += (endrev + 1) * self._io.size
1702 1737 length = end - start
1703 1738
1704 1739 return start, self._getsegment(start, length, df=df)
1705 1740
1706 1741 def _chunk(self, rev, df=None):
1707 1742 """Obtain a single decompressed chunk for a revision.
1708 1743
1709 1744 Accepts an integer revision and an optional already-open file handle
1710 1745 to be used for reading. If used, the seek position of the file will not
1711 1746 be preserved.
1712 1747
1713 1748 Returns a str holding uncompressed data for the requested revision.
1714 1749 """
1715 1750 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1716 1751
1717 1752 def _chunks(self, revs, df=None):
1718 1753 """Obtain decompressed chunks for the specified revisions.
1719 1754
1720 1755 Accepts an iterable of numeric revisions that are assumed to be in
1721 1756 ascending order. Also accepts an optional already-open file handle
1722 1757 to be used for reading. If used, the seek position of the file will
1723 1758 not be preserved.
1724 1759
1725 1760 This function is similar to calling ``self._chunk()`` multiple times,
1726 1761 but is faster.
1727 1762
1728 1763 Returns a list with decompressed data for each requested revision.
1729 1764 """
1730 1765 if not revs:
1731 1766 return []
1732 1767 start = self.start
1733 1768 length = self.length
1734 1769 inline = self._inline
1735 1770 iosize = self._io.size
1736 1771 buffer = util.buffer
1737 1772
1738 1773 l = []
1739 1774 ladd = l.append
1740 1775
1741 1776 if not self._withsparseread:
1742 1777 slicedchunks = (revs,)
1743 1778 else:
1744 1779 slicedchunks = _slicechunk(self, revs)
1745 1780
1746 1781 for revschunk in slicedchunks:
1747 1782 firstrev = revschunk[0]
1748 1783 # Skip trailing revisions with empty diff
1749 1784 for lastrev in revschunk[::-1]:
1750 1785 if length(lastrev) != 0:
1751 1786 break
1752 1787
1753 1788 try:
1754 1789 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1755 1790 except OverflowError:
1756 1791 # issue4215 - we can't cache a run of chunks greater than
1757 1792 # 2G on Windows
1758 1793 return [self._chunk(rev, df=df) for rev in revschunk]
1759 1794
1760 1795 decomp = self.decompress
1761 1796 for rev in revschunk:
1762 1797 chunkstart = start(rev)
1763 1798 if inline:
1764 1799 chunkstart += (rev + 1) * iosize
1765 1800 chunklength = length(rev)
1766 1801 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1767 1802
1768 1803 return l
1769 1804
1770 1805 def _chunkclear(self):
1771 1806 """Clear the raw chunk cache."""
1772 1807 self._chunkcache = (0, '')
1773 1808
1774 1809 def deltaparent(self, rev):
1775 1810 """return deltaparent of the given revision"""
1776 1811 base = self.index[rev][3]
1777 1812 if base == rev:
1778 1813 return nullrev
1779 1814 elif self._generaldelta:
1780 1815 return base
1781 1816 else:
1782 1817 return rev - 1
1783 1818
1784 1819 def revdiff(self, rev1, rev2):
1785 1820 """return or calculate a delta between two revisions
1786 1821
1787 1822 The delta calculated is in binary form and is intended to be written to
1788 1823 revlog data directly. So this function needs raw revision data.
1789 1824 """
1790 1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1791 1826 return bytes(self._chunk(rev2))
1792 1827
1793 1828 return mdiff.textdiff(self.revision(rev1, raw=True),
1794 1829 self.revision(rev2, raw=True))
1795 1830
1796 1831 def revision(self, nodeorrev, _df=None, raw=False):
1797 1832 """return an uncompressed revision of a given node or revision
1798 1833 number.
1799 1834
1800 1835 _df - an existing file handle to read from. (internal-only)
1801 1836 raw - an optional argument specifying if the revision data is to be
1802 1837 treated as raw data when applying flag transforms. 'raw' should be set
1803 1838 to True when generating changegroups or in debug commands.
1804 1839 """
1805 1840 if isinstance(nodeorrev, int):
1806 1841 rev = nodeorrev
1807 1842 node = self.node(rev)
1808 1843 else:
1809 1844 node = nodeorrev
1810 1845 rev = None
1811 1846
1812 1847 cachedrev = None
1813 1848 flags = None
1814 1849 rawtext = None
1815 1850 if node == nullid:
1816 1851 return ""
1817 1852 if self._cache:
1818 1853 if self._cache[0] == node:
1819 1854 # _cache only stores rawtext
1820 1855 if raw:
1821 1856 return self._cache[2]
1822 1857 # duplicated, but good for perf
1823 1858 if rev is None:
1824 1859 rev = self.rev(node)
1825 1860 if flags is None:
1826 1861 flags = self.flags(rev)
1827 1862 # no extra flags set, no flag processor runs, text = rawtext
1828 1863 if flags == REVIDX_DEFAULT_FLAGS:
1829 1864 return self._cache[2]
1830 1865 # rawtext is reusable. need to run flag processor
1831 1866 rawtext = self._cache[2]
1832 1867
1833 1868 cachedrev = self._cache[1]
1834 1869
1835 1870 # look up what we need to read
1836 1871 if rawtext is None:
1837 1872 if rev is None:
1838 1873 rev = self.rev(node)
1839 1874
1840 1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1841 1876 if stopped:
1842 1877 rawtext = self._cache[2]
1843 1878
1844 1879 # drop cache to save memory
1845 1880 self._cache = None
1846 1881
1847 1882 bins = self._chunks(chain, df=_df)
1848 1883 if rawtext is None:
1849 1884 rawtext = bytes(bins[0])
1850 1885 bins = bins[1:]
1851 1886
1852 1887 rawtext = mdiff.patches(rawtext, bins)
1853 1888 self._cache = (node, rev, rawtext)
1854 1889
1855 1890 if flags is None:
1856 1891 if rev is None:
1857 1892 rev = self.rev(node)
1858 1893 flags = self.flags(rev)
1859 1894
1860 1895 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1861 1896 if validatehash:
1862 1897 self.checkhash(text, node, rev=rev)
1863 1898
1864 1899 return text
1865 1900
1866 1901 def hash(self, text, p1, p2):
1867 1902 """Compute a node hash.
1868 1903
1869 1904 Available as a function so that subclasses can replace the hash
1870 1905 as needed.
1871 1906 """
1872 1907 return hash(text, p1, p2)
1873 1908
1874 1909 def _processflags(self, text, flags, operation, raw=False):
1875 1910 """Inspect revision data flags and applies transforms defined by
1876 1911 registered flag processors.
1877 1912
1878 1913 ``text`` - the revision data to process
1879 1914 ``flags`` - the revision flags
1880 1915 ``operation`` - the operation being performed (read or write)
1881 1916 ``raw`` - an optional argument describing if the raw transform should be
1882 1917 applied.
1883 1918
1884 1919 This method processes the flags in the order (or reverse order if
1885 1920 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1886 1921 flag processors registered for present flags. The order of flags defined
1887 1922 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1888 1923
1889 1924 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1890 1925 processed text and ``validatehash`` is a bool indicating whether the
1891 1926 returned text should be checked for hash integrity.
1892 1927
1893 1928 Note: If the ``raw`` argument is set, it has precedence over the
1894 1929 operation and will only update the value of ``validatehash``.
1895 1930 """
1896 1931 # fast path: no flag processors will run
1897 1932 if flags == 0:
1898 1933 return text, True
1899 1934 if not operation in ('read', 'write'):
1900 1935 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1901 1936 # Check all flags are known.
1902 1937 if flags & ~REVIDX_KNOWN_FLAGS:
1903 1938 raise RevlogError(_("incompatible revision flag '%#x'") %
1904 1939 (flags & ~REVIDX_KNOWN_FLAGS))
1905 1940 validatehash = True
1906 1941 # Depending on the operation (read or write), the order might be
1907 1942 # reversed due to non-commutative transforms.
1908 1943 orderedflags = REVIDX_FLAGS_ORDER
1909 1944 if operation == 'write':
1910 1945 orderedflags = reversed(orderedflags)
1911 1946
1912 1947 for flag in orderedflags:
1913 1948 # If a flagprocessor has been registered for a known flag, apply the
1914 1949 # related operation transform and update result tuple.
1915 1950 if flag & flags:
1916 1951 vhash = True
1917 1952
1918 1953 if flag not in _flagprocessors:
1919 1954 message = _("missing processor for flag '%#x'") % (flag)
1920 1955 raise RevlogError(message)
1921 1956
1922 1957 processor = _flagprocessors[flag]
1923 1958 if processor is not None:
1924 1959 readtransform, writetransform, rawtransform = processor
1925 1960
1926 1961 if raw:
1927 1962 vhash = rawtransform(self, text)
1928 1963 elif operation == 'read':
1929 1964 text, vhash = readtransform(self, text)
1930 1965 else: # write operation
1931 1966 text, vhash = writetransform(self, text)
1932 1967 validatehash = validatehash and vhash
1933 1968
1934 1969 return text, validatehash
1935 1970
1936 1971 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1937 1972 """Check node hash integrity.
1938 1973
1939 1974 Available as a function so that subclasses can extend hash mismatch
1940 1975 behaviors as needed.
1941 1976 """
1942 1977 try:
1943 1978 if p1 is None and p2 is None:
1944 1979 p1, p2 = self.parents(node)
1945 1980 if node != self.hash(text, p1, p2):
1946 1981 revornode = rev
1947 1982 if revornode is None:
1948 1983 revornode = templatefilters.short(hex(node))
1949 1984 raise RevlogError(_("integrity check failed on %s:%s")
1950 1985 % (self.indexfile, pycompat.bytestr(revornode)))
1951 1986 except RevlogError:
1952 1987 if self._censorable and _censoredtext(text):
1953 1988 raise error.CensoredNodeError(self.indexfile, node, text)
1954 1989 raise
1955 1990
1956 1991 def _enforceinlinesize(self, tr, fp=None):
1957 1992 """Check if the revlog is too big for inline and convert if so.
1958 1993
1959 1994 This should be called after revisions are added to the revlog. If the
1960 1995 revlog has grown too large to be an inline revlog, it will convert it
1961 1996 to use multiple index and data files.
1962 1997 """
1963 1998 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1964 1999 return
1965 2000
1966 2001 trinfo = tr.find(self.indexfile)
1967 2002 if trinfo is None:
1968 2003 raise RevlogError(_("%s not found in the transaction")
1969 2004 % self.indexfile)
1970 2005
1971 2006 trindex = trinfo[2]
1972 2007 if trindex is not None:
1973 2008 dataoff = self.start(trindex)
1974 2009 else:
1975 2010 # revlog was stripped at start of transaction, use all leftover data
1976 2011 trindex = len(self) - 1
1977 2012 dataoff = self.end(-2)
1978 2013
1979 2014 tr.add(self.datafile, dataoff)
1980 2015
1981 2016 if fp:
1982 2017 fp.flush()
1983 2018 fp.close()
1984 2019
1985 2020 with self._datafp('w') as df:
1986 2021 for r in self:
1987 2022 df.write(self._getsegmentforrevs(r, r)[1])
1988 2023
1989 2024 with self._indexfp('w') as fp:
1990 2025 self.version &= ~FLAG_INLINE_DATA
1991 2026 self._inline = False
1992 2027 io = self._io
1993 2028 for i in self:
1994 2029 e = io.packentry(self.index[i], self.node, self.version, i)
1995 2030 fp.write(e)
1996 2031
1997 2032 # the temp file replace the real index when we exit the context
1998 2033 # manager
1999 2034
2000 2035 tr.replace(self.indexfile, trindex * self._io.size)
2001 2036 self._chunkclear()
2002 2037
2003 2038 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2004 2039 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2005 2040 """add a revision to the log
2006 2041
2007 2042 text - the revision data to add
2008 2043 transaction - the transaction object used for rollback
2009 2044 link - the linkrev data to add
2010 2045 p1, p2 - the parent nodeids of the revision
2011 2046 cachedelta - an optional precomputed delta
2012 2047 node - nodeid of revision; typically node is not specified, and it is
2013 2048 computed by default as hash(text, p1, p2), however subclasses might
2014 2049 use different hashing method (and override checkhash() in such case)
2015 2050 flags - the known flags to set on the revision
2016 2051 deltacomputer - an optional _deltacomputer instance shared between
2017 2052 multiple calls
2018 2053 """
2019 2054 if link == nullrev:
2020 2055 raise RevlogError(_("attempted to add linkrev -1 to %s")
2021 2056 % self.indexfile)
2022 2057
2023 2058 if flags:
2024 2059 node = node or self.hash(text, p1, p2)
2025 2060
2026 2061 rawtext, validatehash = self._processflags(text, flags, 'write')
2027 2062
2028 2063 # If the flag processor modifies the revision data, ignore any provided
2029 2064 # cachedelta.
2030 2065 if rawtext != text:
2031 2066 cachedelta = None
2032 2067
2033 2068 if len(rawtext) > _maxentrysize:
2034 2069 raise RevlogError(
2035 2070 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2036 2071 % (self.indexfile, len(rawtext)))
2037 2072
2038 2073 node = node or self.hash(rawtext, p1, p2)
2039 2074 if node in self.nodemap:
2040 2075 return node
2041 2076
2042 2077 if validatehash:
2043 2078 self.checkhash(rawtext, node, p1=p1, p2=p2)
2044 2079
2045 2080 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2046 2081 flags, cachedelta=cachedelta,
2047 2082 deltacomputer=deltacomputer)
2048 2083
2049 2084 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2050 2085 cachedelta=None, deltacomputer=None):
2051 2086 """add a raw revision with known flags, node and parents
2052 2087 useful when reusing a revision not stored in this revlog (ex: received
2053 2088 over wire, or read from an external bundle).
2054 2089 """
2055 2090 dfh = None
2056 2091 if not self._inline:
2057 2092 dfh = self._datafp("a+")
2058 2093 ifh = self._indexfp("a+")
2059 2094 try:
2060 2095 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2061 2096 flags, cachedelta, ifh, dfh,
2062 2097 deltacomputer=deltacomputer)
2063 2098 finally:
2064 2099 if dfh:
2065 2100 dfh.close()
2066 2101 ifh.close()
2067 2102
2068 2103 def compress(self, data):
2069 2104 """Generate a possibly-compressed representation of data."""
2070 2105 if not data:
2071 2106 return '', data
2072 2107
2073 2108 compressed = self._compressor.compress(data)
2074 2109
2075 2110 if compressed:
2076 2111 # The revlog compressor added the header in the returned data.
2077 2112 return '', compressed
2078 2113
2079 2114 if data[0:1] == '\0':
2080 2115 return '', data
2081 2116 return 'u', data
2082 2117
2083 2118 def decompress(self, data):
2084 2119 """Decompress a revlog chunk.
2085 2120
2086 2121 The chunk is expected to begin with a header identifying the
2087 2122 format type so it can be routed to an appropriate decompressor.
2088 2123 """
2089 2124 if not data:
2090 2125 return data
2091 2126
2092 2127 # Revlogs are read much more frequently than they are written and many
2093 2128 # chunks only take microseconds to decompress, so performance is
2094 2129 # important here.
2095 2130 #
2096 2131 # We can make a few assumptions about revlogs:
2097 2132 #
2098 2133 # 1) the majority of chunks will be compressed (as opposed to inline
2099 2134 # raw data).
2100 2135 # 2) decompressing *any* data will likely by at least 10x slower than
2101 2136 # returning raw inline data.
2102 2137 # 3) we want to prioritize common and officially supported compression
2103 2138 # engines
2104 2139 #
2105 2140 # It follows that we want to optimize for "decompress compressed data
2106 2141 # when encoded with common and officially supported compression engines"
2107 2142 # case over "raw data" and "data encoded by less common or non-official
2108 2143 # compression engines." That is why we have the inline lookup first
2109 2144 # followed by the compengines lookup.
2110 2145 #
2111 2146 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2112 2147 # compressed chunks. And this matters for changelog and manifest reads.
2113 2148 t = data[0:1]
2114 2149
2115 2150 if t == 'x':
2116 2151 try:
2117 2152 return _zlibdecompress(data)
2118 2153 except zlib.error as e:
2119 2154 raise RevlogError(_('revlog decompress error: %s') %
2120 2155 stringutil.forcebytestr(e))
2121 2156 # '\0' is more common than 'u' so it goes first.
2122 2157 elif t == '\0':
2123 2158 return data
2124 2159 elif t == 'u':
2125 2160 return util.buffer(data, 1)
2126 2161
2127 2162 try:
2128 2163 compressor = self._decompressors[t]
2129 2164 except KeyError:
2130 2165 try:
2131 2166 engine = util.compengines.forrevlogheader(t)
2132 2167 compressor = engine.revlogcompressor()
2133 2168 self._decompressors[t] = compressor
2134 2169 except KeyError:
2135 2170 raise RevlogError(_('unknown compression type %r') % t)
2136 2171
2137 2172 return compressor.decompress(data)
2138 2173
2139 2174 def _isgooddeltainfo(self, deltainfo, revinfo):
2140 2175 """Returns True if the given delta is good. Good means that it is within
2141 2176 the disk span, disk size, and chain length bounds that we know to be
2142 2177 performant."""
2143 2178 if deltainfo is None:
2144 2179 return False
2145 2180
2146 2181 # - 'deltainfo.distance' is the distance from the base revision --
2147 2182 # bounding it limits the amount of I/O we need to do.
2148 2183 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2149 2184 # deltas we need to apply -- bounding it limits the amount of CPU
2150 2185 # we consume.
2151 2186
2152 2187 textlen = revinfo.textlen
2153 2188 defaultmax = textlen * 4
2154 2189 maxdist = self._maxdeltachainspan
2155 2190 if not maxdist:
2156 2191 maxdist = deltainfo.distance # ensure the conditional pass
2157 2192 maxdist = max(maxdist, defaultmax)
2158 2193 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2159 2194 deltainfo.compresseddeltalen > textlen * 2 or
2160 2195 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2161 2196 return False
2162 2197
2163 2198 return True
2164 2199
2165 2200 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2166 2201 cachedelta, ifh, dfh, alwayscache=False,
2167 2202 deltacomputer=None):
2168 2203 """internal function to add revisions to the log
2169 2204
2170 2205 see addrevision for argument descriptions.
2171 2206
2172 2207 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2173 2208
2174 2209 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2175 2210 be used.
2176 2211
2177 2212 invariants:
2178 2213 - rawtext is optional (can be None); if not set, cachedelta must be set.
2179 2214 if both are set, they must correspond to each other.
2180 2215 """
2181 2216 if node == nullid:
2182 2217 raise RevlogError(_("%s: attempt to add null revision") %
2183 2218 (self.indexfile))
2184 2219 if node == wdirid or node in wdirfilenodeids:
2185 2220 raise RevlogError(_("%s: attempt to add wdir revision") %
2186 2221 (self.indexfile))
2187 2222
2188 2223 if self._inline:
2189 2224 fh = ifh
2190 2225 else:
2191 2226 fh = dfh
2192 2227
2193 2228 btext = [rawtext]
2194 2229
2195 2230 curr = len(self)
2196 2231 prev = curr - 1
2197 2232 offset = self.end(prev)
2198 2233 p1r, p2r = self.rev(p1), self.rev(p2)
2199 2234
2200 2235 # full versions are inserted when the needed deltas
2201 2236 # become comparable to the uncompressed text
2202 2237 if rawtext is None:
2203 2238 # need rawtext size, before changed by flag processors, which is
2204 2239 # the non-raw size. use revlog explicitly to avoid filelog's extra
2205 2240 # logic that might remove metadata size.
2206 2241 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2207 2242 cachedelta[1])
2208 2243 else:
2209 2244 textlen = len(rawtext)
2210 2245
2211 2246 if deltacomputer is None:
2212 2247 deltacomputer = _deltacomputer(self)
2213 2248
2214 2249 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2215 2250
2216 2251 # no delta for flag processor revision (see "candelta" for why)
2217 2252 # not calling candelta since only one revision needs test, also to
2218 2253 # avoid overhead fetching flags again.
2219 2254 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2220 2255 deltainfo = None
2221 2256 else:
2222 2257 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2223 2258
2224 2259 if deltainfo is not None:
2225 2260 base = deltainfo.base
2226 2261 chainbase = deltainfo.chainbase
2227 2262 data = deltainfo.data
2228 2263 l = deltainfo.deltalen
2229 2264 else:
2230 2265 rawtext = deltacomputer.buildtext(revinfo, fh)
2231 2266 data = self.compress(rawtext)
2232 2267 l = len(data[1]) + len(data[0])
2233 2268 base = chainbase = curr
2234 2269
2235 2270 e = (offset_type(offset, flags), l, textlen,
2236 2271 base, link, p1r, p2r, node)
2237 2272 self.index.insert(-1, e)
2238 2273 self.nodemap[node] = curr
2239 2274
2240 2275 entry = self._io.packentry(e, self.node, self.version, curr)
2241 2276 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2242 2277
2243 2278 if alwayscache and rawtext is None:
2244 2279 rawtext = deltacomputer._buildtext(revinfo, fh)
2245 2280
2246 2281 if type(rawtext) == bytes: # only accept immutable objects
2247 2282 self._cache = (node, curr, rawtext)
2248 2283 self._chainbasecache[curr] = chainbase
2249 2284 return node
2250 2285
2251 2286 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2252 2287 # Files opened in a+ mode have inconsistent behavior on various
2253 2288 # platforms. Windows requires that a file positioning call be made
2254 2289 # when the file handle transitions between reads and writes. See
2255 2290 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2256 2291 # platforms, Python or the platform itself can be buggy. Some versions
2257 2292 # of Solaris have been observed to not append at the end of the file
2258 2293 # if the file was seeked to before the end. See issue4943 for more.
2259 2294 #
2260 2295 # We work around this issue by inserting a seek() before writing.
2261 2296 # Note: This is likely not necessary on Python 3.
2262 2297 ifh.seek(0, os.SEEK_END)
2263 2298 if dfh:
2264 2299 dfh.seek(0, os.SEEK_END)
2265 2300
2266 2301 curr = len(self) - 1
2267 2302 if not self._inline:
2268 2303 transaction.add(self.datafile, offset)
2269 2304 transaction.add(self.indexfile, curr * len(entry))
2270 2305 if data[0]:
2271 2306 dfh.write(data[0])
2272 2307 dfh.write(data[1])
2273 2308 ifh.write(entry)
2274 2309 else:
2275 2310 offset += curr * self._io.size
2276 2311 transaction.add(self.indexfile, offset, curr)
2277 2312 ifh.write(entry)
2278 2313 ifh.write(data[0])
2279 2314 ifh.write(data[1])
2280 2315 self._enforceinlinesize(transaction, ifh)
2281 2316
2282 2317 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2283 2318 """
2284 2319 add a delta group
2285 2320
2286 2321 given a set of deltas, add them to the revision log. the
2287 2322 first delta is against its parent, which should be in our
2288 2323 log, the rest are against the previous delta.
2289 2324
2290 2325 If ``addrevisioncb`` is defined, it will be called with arguments of
2291 2326 this revlog and the node that was added.
2292 2327 """
2293 2328
2294 2329 nodes = []
2295 2330
2296 2331 r = len(self)
2297 2332 end = 0
2298 2333 if r:
2299 2334 end = self.end(r - 1)
2300 2335 ifh = self._indexfp("a+")
2301 2336 isize = r * self._io.size
2302 2337 if self._inline:
2303 2338 transaction.add(self.indexfile, end + isize, r)
2304 2339 dfh = None
2305 2340 else:
2306 2341 transaction.add(self.indexfile, isize, r)
2307 2342 transaction.add(self.datafile, end)
2308 2343 dfh = self._datafp("a+")
2309 2344 def flush():
2310 2345 if dfh:
2311 2346 dfh.flush()
2312 2347 ifh.flush()
2313 2348 try:
2314 2349 deltacomputer = _deltacomputer(self)
2315 2350 # loop through our set of deltas
2316 2351 for data in deltas:
2317 2352 node, p1, p2, linknode, deltabase, delta, flags = data
2318 2353 link = linkmapper(linknode)
2319 2354 flags = flags or REVIDX_DEFAULT_FLAGS
2320 2355
2321 2356 nodes.append(node)
2322 2357
2323 2358 if node in self.nodemap:
2324 2359 # this can happen if two branches make the same change
2325 2360 continue
2326 2361
2327 2362 for p in (p1, p2):
2328 2363 if p not in self.nodemap:
2329 2364 raise LookupError(p, self.indexfile,
2330 2365 _('unknown parent'))
2331 2366
2332 2367 if deltabase not in self.nodemap:
2333 2368 raise LookupError(deltabase, self.indexfile,
2334 2369 _('unknown delta base'))
2335 2370
2336 2371 baserev = self.rev(deltabase)
2337 2372
2338 2373 if baserev != nullrev and self.iscensored(baserev):
2339 2374 # if base is censored, delta must be full replacement in a
2340 2375 # single patch operation
2341 2376 hlen = struct.calcsize(">lll")
2342 2377 oldlen = self.rawsize(baserev)
2343 2378 newlen = len(delta) - hlen
2344 2379 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2345 2380 raise error.CensoredBaseError(self.indexfile,
2346 2381 self.node(baserev))
2347 2382
2348 2383 if not flags and self._peek_iscensored(baserev, delta, flush):
2349 2384 flags |= REVIDX_ISCENSORED
2350 2385
2351 2386 # We assume consumers of addrevisioncb will want to retrieve
2352 2387 # the added revision, which will require a call to
2353 2388 # revision(). revision() will fast path if there is a cache
2354 2389 # hit. So, we tell _addrevision() to always cache in this case.
2355 2390 # We're only using addgroup() in the context of changegroup
2356 2391 # generation so the revision data can always be handled as raw
2357 2392 # by the flagprocessor.
2358 2393 self._addrevision(node, None, transaction, link,
2359 2394 p1, p2, flags, (baserev, delta),
2360 2395 ifh, dfh,
2361 2396 alwayscache=bool(addrevisioncb),
2362 2397 deltacomputer=deltacomputer)
2363 2398
2364 2399 if addrevisioncb:
2365 2400 addrevisioncb(self, node)
2366 2401
2367 2402 if not dfh and not self._inline:
2368 2403 # addrevision switched from inline to conventional
2369 2404 # reopen the index
2370 2405 ifh.close()
2371 2406 dfh = self._datafp("a+")
2372 2407 ifh = self._indexfp("a+")
2373 2408 finally:
2374 2409 if dfh:
2375 2410 dfh.close()
2376 2411 ifh.close()
2377 2412
2378 2413 return nodes
2379 2414
2380 2415 def iscensored(self, rev):
2381 2416 """Check if a file revision is censored."""
2382 2417 if not self._censorable:
2383 2418 return False
2384 2419
2385 2420 return self.flags(rev) & REVIDX_ISCENSORED
2386 2421
2387 2422 def _peek_iscensored(self, baserev, delta, flush):
2388 2423 """Quickly check if a delta produces a censored revision."""
2389 2424 if not self._censorable:
2390 2425 return False
2391 2426
2392 2427 # Fragile heuristic: unless new file meta keys are added alphabetically
2393 2428 # preceding "censored", all censored revisions are prefixed by
2394 2429 # "\1\ncensored:". A delta producing such a censored revision must be a
2395 2430 # full-replacement delta, so we inspect the first and only patch in the
2396 2431 # delta for this prefix.
2397 2432 hlen = struct.calcsize(">lll")
2398 2433 if len(delta) <= hlen:
2399 2434 return False
2400 2435
2401 2436 oldlen = self.rawsize(baserev)
2402 2437 newlen = len(delta) - hlen
2403 2438 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2404 2439 return False
2405 2440
2406 2441 add = "\1\ncensored:"
2407 2442 addlen = len(add)
2408 2443 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2409 2444
2410 2445 def getstrippoint(self, minlink):
2411 2446 """find the minimum rev that must be stripped to strip the linkrev
2412 2447
2413 2448 Returns a tuple containing the minimum rev and a set of all revs that
2414 2449 have linkrevs that will be broken by this strip.
2415 2450 """
2416 2451 brokenrevs = set()
2417 2452 strippoint = len(self)
2418 2453
2419 2454 heads = {}
2420 2455 futurelargelinkrevs = set()
2421 2456 for head in self.headrevs():
2422 2457 headlinkrev = self.linkrev(head)
2423 2458 heads[head] = headlinkrev
2424 2459 if headlinkrev >= minlink:
2425 2460 futurelargelinkrevs.add(headlinkrev)
2426 2461
2427 2462 # This algorithm involves walking down the rev graph, starting at the
2428 2463 # heads. Since the revs are topologically sorted according to linkrev,
2429 2464 # once all head linkrevs are below the minlink, we know there are
2430 2465 # no more revs that could have a linkrev greater than minlink.
2431 2466 # So we can stop walking.
2432 2467 while futurelargelinkrevs:
2433 2468 strippoint -= 1
2434 2469 linkrev = heads.pop(strippoint)
2435 2470
2436 2471 if linkrev < minlink:
2437 2472 brokenrevs.add(strippoint)
2438 2473 else:
2439 2474 futurelargelinkrevs.remove(linkrev)
2440 2475
2441 2476 for p in self.parentrevs(strippoint):
2442 2477 if p != nullrev:
2443 2478 plinkrev = self.linkrev(p)
2444 2479 heads[p] = plinkrev
2445 2480 if plinkrev >= minlink:
2446 2481 futurelargelinkrevs.add(plinkrev)
2447 2482
2448 2483 return strippoint, brokenrevs
2449 2484
2450 2485 def strip(self, minlink, transaction):
2451 2486 """truncate the revlog on the first revision with a linkrev >= minlink
2452 2487
2453 2488 This function is called when we're stripping revision minlink and
2454 2489 its descendants from the repository.
2455 2490
2456 2491 We have to remove all revisions with linkrev >= minlink, because
2457 2492 the equivalent changelog revisions will be renumbered after the
2458 2493 strip.
2459 2494
2460 2495 So we truncate the revlog on the first of these revisions, and
2461 2496 trust that the caller has saved the revisions that shouldn't be
2462 2497 removed and that it'll re-add them after this truncation.
2463 2498 """
2464 2499 if len(self) == 0:
2465 2500 return
2466 2501
2467 2502 rev, _ = self.getstrippoint(minlink)
2468 2503 if rev == len(self):
2469 2504 return
2470 2505
2471 2506 # first truncate the files on disk
2472 2507 end = self.start(rev)
2473 2508 if not self._inline:
2474 2509 transaction.add(self.datafile, end)
2475 2510 end = rev * self._io.size
2476 2511 else:
2477 2512 end += rev * self._io.size
2478 2513
2479 2514 transaction.add(self.indexfile, end)
2480 2515
2481 2516 # then reset internal state in memory to forget those revisions
2482 2517 self._cache = None
2483 2518 self._chaininfocache = {}
2484 2519 self._chunkclear()
2485 2520 for x in xrange(rev, len(self)):
2486 2521 del self.nodemap[self.node(x)]
2487 2522
2488 2523 del self.index[rev:-1]
2489 2524 self._nodepos = None
2490 2525
2491 2526 def checksize(self):
2492 2527 expected = 0
2493 2528 if len(self):
2494 2529 expected = max(0, self.end(len(self) - 1))
2495 2530
2496 2531 try:
2497 2532 with self._datafp() as f:
2498 2533 f.seek(0, 2)
2499 2534 actual = f.tell()
2500 2535 dd = actual - expected
2501 2536 except IOError as inst:
2502 2537 if inst.errno != errno.ENOENT:
2503 2538 raise
2504 2539 dd = 0
2505 2540
2506 2541 try:
2507 2542 f = self.opener(self.indexfile)
2508 2543 f.seek(0, 2)
2509 2544 actual = f.tell()
2510 2545 f.close()
2511 2546 s = self._io.size
2512 2547 i = max(0, actual // s)
2513 2548 di = actual - (i * s)
2514 2549 if self._inline:
2515 2550 databytes = 0
2516 2551 for r in self:
2517 2552 databytes += max(0, self.length(r))
2518 2553 dd = 0
2519 2554 di = actual - len(self) * s - databytes
2520 2555 except IOError as inst:
2521 2556 if inst.errno != errno.ENOENT:
2522 2557 raise
2523 2558 di = 0
2524 2559
2525 2560 return (dd, di)
2526 2561
2527 2562 def files(self):
2528 2563 res = [self.indexfile]
2529 2564 if not self._inline:
2530 2565 res.append(self.datafile)
2531 2566 return res
2532 2567
2533 2568 DELTAREUSEALWAYS = 'always'
2534 2569 DELTAREUSESAMEREVS = 'samerevs'
2535 2570 DELTAREUSENEVER = 'never'
2536 2571
2537 2572 DELTAREUSEFULLADD = 'fulladd'
2538 2573
2539 2574 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2540 2575
2541 2576 def clone(self, tr, destrevlog, addrevisioncb=None,
2542 2577 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2543 2578 """Copy this revlog to another, possibly with format changes.
2544 2579
2545 2580 The destination revlog will contain the same revisions and nodes.
2546 2581 However, it may not be bit-for-bit identical due to e.g. delta encoding
2547 2582 differences.
2548 2583
2549 2584 The ``deltareuse`` argument control how deltas from the existing revlog
2550 2585 are preserved in the destination revlog. The argument can have the
2551 2586 following values:
2552 2587
2553 2588 DELTAREUSEALWAYS
2554 2589 Deltas will always be reused (if possible), even if the destination
2555 2590 revlog would not select the same revisions for the delta. This is the
2556 2591 fastest mode of operation.
2557 2592 DELTAREUSESAMEREVS
2558 2593 Deltas will be reused if the destination revlog would pick the same
2559 2594 revisions for the delta. This mode strikes a balance between speed
2560 2595 and optimization.
2561 2596 DELTAREUSENEVER
2562 2597 Deltas will never be reused. This is the slowest mode of execution.
2563 2598 This mode can be used to recompute deltas (e.g. if the diff/delta
2564 2599 algorithm changes).
2565 2600
2566 2601 Delta computation can be slow, so the choice of delta reuse policy can
2567 2602 significantly affect run time.
2568 2603
2569 2604 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2570 2605 two extremes. Deltas will be reused if they are appropriate. But if the
2571 2606 delta could choose a better revision, it will do so. This means if you
2572 2607 are converting a non-generaldelta revlog to a generaldelta revlog,
2573 2608 deltas will be recomputed if the delta's parent isn't a parent of the
2574 2609 revision.
2575 2610
2576 2611 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2577 2612 controls whether to compute deltas against both parents for merges.
2578 2613 By default, the current default is used.
2579 2614 """
2580 2615 if deltareuse not in self.DELTAREUSEALL:
2581 2616 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2582 2617
2583 2618 if len(destrevlog):
2584 2619 raise ValueError(_('destination revlog is not empty'))
2585 2620
2586 2621 if getattr(self, 'filteredrevs', None):
2587 2622 raise ValueError(_('source revlog has filtered revisions'))
2588 2623 if getattr(destrevlog, 'filteredrevs', None):
2589 2624 raise ValueError(_('destination revlog has filtered revisions'))
2590 2625
2591 2626 # lazydeltabase controls whether to reuse a cached delta, if possible.
2592 2627 oldlazydeltabase = destrevlog._lazydeltabase
2593 2628 oldamd = destrevlog._aggressivemergedeltas
2594 2629
2595 2630 try:
2596 2631 if deltareuse == self.DELTAREUSEALWAYS:
2597 2632 destrevlog._lazydeltabase = True
2598 2633 elif deltareuse == self.DELTAREUSESAMEREVS:
2599 2634 destrevlog._lazydeltabase = False
2600 2635
2601 2636 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2602 2637
2603 2638 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2604 2639 self.DELTAREUSESAMEREVS)
2605 2640
2606 2641 deltacomputer = _deltacomputer(destrevlog)
2607 2642 index = self.index
2608 2643 for rev in self:
2609 2644 entry = index[rev]
2610 2645
2611 2646 # Some classes override linkrev to take filtered revs into
2612 2647 # account. Use raw entry from index.
2613 2648 flags = entry[0] & 0xffff
2614 2649 linkrev = entry[4]
2615 2650 p1 = index[entry[5]][7]
2616 2651 p2 = index[entry[6]][7]
2617 2652 node = entry[7]
2618 2653
2619 2654 # (Possibly) reuse the delta from the revlog if allowed and
2620 2655 # the revlog chunk is a delta.
2621 2656 cachedelta = None
2622 2657 rawtext = None
2623 2658 if populatecachedelta:
2624 2659 dp = self.deltaparent(rev)
2625 2660 if dp != nullrev:
2626 2661 cachedelta = (dp, bytes(self._chunk(rev)))
2627 2662
2628 2663 if not cachedelta:
2629 2664 rawtext = self.revision(rev, raw=True)
2630 2665
2631 2666
2632 2667 if deltareuse == self.DELTAREUSEFULLADD:
2633 2668 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2634 2669 cachedelta=cachedelta,
2635 2670 node=node, flags=flags,
2636 2671 deltacomputer=deltacomputer)
2637 2672 else:
2638 2673 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2639 2674 checkambig=False)
2640 2675 dfh = None
2641 2676 if not destrevlog._inline:
2642 2677 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2643 2678 try:
2644 2679 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2645 2680 p2, flags, cachedelta, ifh, dfh,
2646 2681 deltacomputer=deltacomputer)
2647 2682 finally:
2648 2683 if dfh:
2649 2684 dfh.close()
2650 2685 ifh.close()
2651 2686
2652 2687 if addrevisioncb:
2653 2688 addrevisioncb(self, rev, node)
2654 2689 finally:
2655 2690 destrevlog._lazydeltabase = oldlazydeltabase
2656 2691 destrevlog._aggressivemergedeltas = oldamd
@@ -1,82 +1,83 b''
1 1 # this is hack to make sure no escape characters are inserted into the output
2 2
3 3 from __future__ import absolute_import
4 4
5 5 import doctest
6 6 import os
7 7 import re
8 8 import sys
9 9
10 10 ispy3 = (sys.version_info[0] >= 3)
11 11
12 12 if 'TERM' in os.environ:
13 13 del os.environ['TERM']
14 14
15 15 class py3docchecker(doctest.OutputChecker):
16 16 def check_output(self, want, got, optionflags):
17 17 want2 = re.sub(r'''\bu(['"])(.*?)\1''', r'\1\2\1', want) # py2: u''
18 18 got2 = re.sub(r'''\bb(['"])(.*?)\1''', r'\1\2\1', got) # py3: b''
19 19 # py3: <exc.name>: b'<msg>' -> <name>: <msg>
20 20 # <exc.name>: <others> -> <name>: <others>
21 21 got2 = re.sub(r'''^mercurial\.\w+\.(\w+): (['"])(.*?)\2''', r'\1: \3',
22 22 got2, re.MULTILINE)
23 23 got2 = re.sub(r'^mercurial\.\w+\.(\w+): ', r'\1: ', got2, re.MULTILINE)
24 24 return any(doctest.OutputChecker.check_output(self, w, g, optionflags)
25 25 for w, g in [(want, got), (want2, got2)])
26 26
27 27 def testmod(name, optionflags=0, testtarget=None):
28 28 __import__(name)
29 29 mod = sys.modules[name]
30 30 if testtarget is not None:
31 31 mod = getattr(mod, testtarget)
32 32
33 33 # minimal copy of doctest.testmod()
34 34 finder = doctest.DocTestFinder()
35 35 checker = None
36 36 if ispy3:
37 37 checker = py3docchecker()
38 38 runner = doctest.DocTestRunner(checker=checker, optionflags=optionflags)
39 39 for test in finder.find(mod, name):
40 40 runner.run(test)
41 41 runner.summarize()
42 42
43 43 testmod('mercurial.changegroup')
44 44 testmod('mercurial.changelog')
45 45 testmod('mercurial.cmdutil')
46 46 testmod('mercurial.color')
47 47 testmod('mercurial.config')
48 48 testmod('mercurial.context')
49 49 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
50 50 testmod('mercurial.dispatch')
51 51 testmod('mercurial.encoding')
52 52 testmod('mercurial.fancyopts')
53 53 testmod('mercurial.formatter')
54 54 testmod('mercurial.hg')
55 55 testmod('mercurial.hgweb.hgwebdir_mod')
56 56 testmod('mercurial.match')
57 57 testmod('mercurial.mdiff')
58 58 testmod('mercurial.minirst')
59 59 testmod('mercurial.patch')
60 60 testmod('mercurial.pathutil')
61 61 testmod('mercurial.parser')
62 62 testmod('mercurial.pycompat')
63 testmod('mercurial.revlog')
63 64 testmod('mercurial.revsetlang')
64 65 testmod('mercurial.smartset')
65 66 testmod('mercurial.store')
66 67 testmod('mercurial.subrepo')
67 68 testmod('mercurial.templatefilters')
68 69 testmod('mercurial.templater')
69 70 testmod('mercurial.ui')
70 71 testmod('mercurial.url')
71 72 testmod('mercurial.util')
72 73 testmod('mercurial.util', testtarget='platform')
73 74 testmod('mercurial.utils.stringutil')
74 75 testmod('hgext.convert.convcmd')
75 76 testmod('hgext.convert.cvsps')
76 77 testmod('hgext.convert.filemap')
77 78 testmod('hgext.convert.p4')
78 79 testmod('hgext.convert.subversion')
79 80 testmod('hgext.fix')
80 81 testmod('hgext.mq')
81 82 # Helper scripts in tests/ that have doctests:
82 83 testmod('drawdag')
General Comments 0
You need to be logged in to leave comments. Login now