##// END OF EJS Templates
revlog: add function to slice chunk down to a given size...
Boris Feld -
r38664:e59e27e5 default
parent child Browse files
Show More
@@ -1,2798 +1,2875 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 class _testrevlog(object):
200 200 """minimalist fake revlog to use in doctests"""
201 201
202 202 def __init__(self, data, density=0.5, mingap=0):
203 203 """data is an list of revision payload boundaries"""
204 204 self._data = data
205 205 self._srdensitythreshold = density
206 206 self._srmingapsize = mingap
207 207
208 208 def start(self, rev):
209 209 if rev == 0:
210 210 return 0
211 211 return self._data[rev - 1]
212 212
213 213 def end(self, rev):
214 214 return self._data[rev]
215 215
216 216 def length(self, rev):
217 217 return self.end(rev) - self.start(rev)
218 218
219 219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 220 """returns revs[startidx:endidx] without empty trailing revs
221 221
222 222 Doctest Setup
223 223 >>> revlog = _testrevlog([
224 224 ... 5, #0
225 225 ... 10, #1
226 226 ... 12, #2
227 227 ... 12, #3 (empty)
228 228 ... 17, #4
229 229 ... 21, #5
230 230 ... 21, #6 (empty)
231 231 ... ])
232 232
233 233 Contiguous cases:
234 234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 235 [0, 1, 2, 3, 4, 5]
236 236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 237 [0, 1, 2, 3, 4]
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 239 [0, 1, 2]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 241 [2]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 243 [3, 4, 5]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 245 [3, 4]
246 246
247 247 Discontiguous cases:
248 248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 249 [1, 3, 5]
250 250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 251 [1]
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 253 [3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 255 [3, 5]
256 256 """
257 257 length = revlog.length
258 258
259 259 if endidx is None:
260 260 endidx = len(revs)
261 261
262 262 # Trim empty revs at the end, but never the very first revision of a chain
263 263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 264 endidx -= 1
265 265
266 266 return revs[startidx:endidx]
267 267
268 268 def _segmentspan(revlog, revs):
269 269 """Get the byte span of a segment of revisions
270 270
271 271 revs is a sorted array of revision numbers
272 272
273 273 >>> revlog = _testrevlog([
274 274 ... 5, #0
275 275 ... 10, #1
276 276 ... 12, #2
277 277 ... 12, #3 (empty)
278 278 ... 17, #4
279 279 ... ])
280 280
281 281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 282 17
283 283 >>> _segmentspan(revlog, [0, 4])
284 284 17
285 285 >>> _segmentspan(revlog, [3, 4])
286 286 5
287 287 >>> _segmentspan(revlog, [1, 2, 3,])
288 288 7
289 289 >>> _segmentspan(revlog, [1, 3])
290 290 7
291 291 """
292 292 if not revs:
293 293 return 0
294 294 return revlog.end(revs[-1]) - revlog.start(revs[0])
295 295
296 296 def _slicechunk(revlog, revs):
297 297 """slice revs to reduce the amount of unrelated data to be read from disk.
298 298
299 299 ``revs`` is sliced into groups that should be read in one time.
300 300 Assume that revs are sorted.
301 301
302 302 The initial chunk is sliced until the overall density (payload/chunks-span
303 303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 304 `revlog._srmingapsize` is skipped.
305 305
306 306 >>> revlog = _testrevlog([
307 307 ... 5, #00 (5)
308 308 ... 10, #01 (5)
309 309 ... 12, #02 (2)
310 310 ... 12, #03 (empty)
311 311 ... 27, #04 (15)
312 312 ... 31, #05 (4)
313 313 ... 31, #06 (empty)
314 314 ... 42, #07 (11)
315 315 ... 47, #08 (5)
316 316 ... 47, #09 (empty)
317 317 ... 48, #10 (1)
318 318 ... 51, #11 (3)
319 319 ... 74, #12 (23)
320 320 ... 85, #13 (11)
321 321 ... 86, #14 (1)
322 322 ... 91, #15 (5)
323 323 ... ])
324 324
325 325 >>> list(_slicechunk(revlog, range(16)))
326 326 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
327 327 >>> list(_slicechunk(revlog, [0, 15]))
328 328 [[0], [15]]
329 329 >>> list(_slicechunk(revlog, [0, 11, 15]))
330 330 [[0], [11], [15]]
331 331 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
332 332 [[0], [11, 13, 15]]
333 333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
334 334 [[1, 2], [5, 8, 10, 11], [14]]
335 335 """
336 336 for chunk in _slicechunktodensity(revlog, revs,
337 337 revlog._srdensitythreshold,
338 338 revlog._srmingapsize):
339 339 yield chunk
340 340
341 def _slicechunktosize(revlog, revs, targetsize):
342 """slice revs to match the target size
343
344 This is intended to be used on chunk that density slicing selected by that
345 are still too large compared to the read garantee of revlog. This might
346 happens when "minimal gap size" interrupted the slicing or when chain are
347 built in a way that create large blocks next to each other.
348
349 >>> revlog = _testrevlog([
350 ... 3, #0 (3)
351 ... 5, #1 (2)
352 ... 6, #2 (1)
353 ... 8, #3 (2)
354 ... 8, #4 (empty)
355 ... 11, #5 (3)
356 ... 12, #6 (1)
357 ... 13, #7 (1)
358 ... 14, #8 (1)
359 ... ])
360
361 Cases where chunk is already small enough
362 >>> list(_slicechunktosize(revlog, [0], 3))
363 [[0]]
364 >>> list(_slicechunktosize(revlog, [6, 7], 3))
365 [[6, 7]]
366 >>> list(_slicechunktosize(revlog, [0], None))
367 [[0]]
368 >>> list(_slicechunktosize(revlog, [6, 7], None))
369 [[6, 7]]
370
371 cases where we need actual slicing
372 >>> list(_slicechunktosize(revlog, [0, 1], 3))
373 [[0], [1]]
374 >>> list(_slicechunktosize(revlog, [1, 3], 3))
375 [[1], [3]]
376 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
377 [[1, 2], [3]]
378 >>> list(_slicechunktosize(revlog, [3, 5], 3))
379 [[3], [5]]
380 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
381 [[3], [5]]
382 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
383 [[5], [6, 7, 8]]
384 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
385 [[0], [1, 2], [3], [5], [6, 7, 8]]
386
387 Case with too large individual chunk (must return valid chunk)
388 >>> list(_slicechunktosize(revlog, [0, 1], 2))
389 [[0], [1]]
390 >>> list(_slicechunktosize(revlog, [1, 3], 1))
391 [[1], [3]]
392 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
393 [[3], [5]]
394 """
395 assert targetsize is None or 0 <= targetsize
396 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
397 yield revs
398 return
399
400 startrevidx = 0
401 startdata = revlog.start(revs[0])
402 endrevidx = 0
403 iterrevs = enumerate(revs)
404 next(iterrevs) # skip first rev.
405 for idx, r in iterrevs:
406 span = revlog.end(r) - startdata
407 if span <= targetsize:
408 endrevidx = idx
409 else:
410 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
411 if chunk:
412 yield chunk
413 startrevidx = idx
414 startdata = revlog.start(r)
415 endrevidx = idx
416 yield _trimchunk(revlog, revs, startrevidx)
417
341 418 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
342 419 """slice revs to reduce the amount of unrelated data to be read from disk.
343 420
344 421 ``revs`` is sliced into groups that should be read in one time.
345 422 Assume that revs are sorted.
346 423
347 424 The initial chunk is sliced until the overall density (payload/chunks-span
348 425 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
349 426 skipped.
350 427
351 428 >>> revlog = _testrevlog([
352 429 ... 5, #00 (5)
353 430 ... 10, #01 (5)
354 431 ... 12, #02 (2)
355 432 ... 12, #03 (empty)
356 433 ... 27, #04 (15)
357 434 ... 31, #05 (4)
358 435 ... 31, #06 (empty)
359 436 ... 42, #07 (11)
360 437 ... 47, #08 (5)
361 438 ... 47, #09 (empty)
362 439 ... 48, #10 (1)
363 440 ... 51, #11 (3)
364 441 ... 74, #12 (23)
365 442 ... 85, #13 (11)
366 443 ... 86, #14 (1)
367 444 ... 91, #15 (5)
368 445 ... ])
369 446
370 447 >>> list(_slicechunktodensity(revlog, range(16)))
371 448 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
372 449 >>> list(_slicechunktodensity(revlog, [0, 15]))
373 450 [[0], [15]]
374 451 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
375 452 [[0], [11], [15]]
376 453 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
377 454 [[0], [11, 13, 15]]
378 455 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
379 456 [[1, 2], [5, 8, 10, 11], [14]]
380 457 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
381 458 ... mingapsize=20))
382 459 [[1, 2, 3, 5, 8, 10, 11], [14]]
383 460 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
384 461 ... targetdensity=0.95))
385 462 [[1, 2], [5], [8, 10, 11], [14]]
386 463 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
387 464 ... targetdensity=0.95, mingapsize=12))
388 465 [[1, 2], [5, 8, 10, 11], [14]]
389 466 """
390 467 start = revlog.start
391 468 length = revlog.length
392 469
393 470 if len(revs) <= 1:
394 471 yield revs
395 472 return
396 473
397 474 readdata = deltachainspan = _segmentspan(revlog, revs)
398 475
399 476 if deltachainspan < mingapsize:
400 477 yield revs
401 478 return
402 479
403 480 chainpayload = sum(length(r) for r in revs)
404 481
405 482 if deltachainspan:
406 483 density = chainpayload / float(deltachainspan)
407 484 else:
408 485 density = 1.0
409 486
410 487 if density >= targetdensity:
411 488 yield revs
412 489 return
413 490
414 491 # Store the gaps in a heap to have them sorted by decreasing size
415 492 gapsheap = []
416 493 heapq.heapify(gapsheap)
417 494 prevend = None
418 495 for i, rev in enumerate(revs):
419 496 revstart = start(rev)
420 497 revlen = length(rev)
421 498
422 499 # Skip empty revisions to form larger holes
423 500 if revlen == 0:
424 501 continue
425 502
426 503 if prevend is not None:
427 504 gapsize = revstart - prevend
428 505 # only consider holes that are large enough
429 506 if gapsize > mingapsize:
430 507 heapq.heappush(gapsheap, (-gapsize, i))
431 508
432 509 prevend = revstart + revlen
433 510
434 511 # Collect the indices of the largest holes until the density is acceptable
435 512 indicesheap = []
436 513 heapq.heapify(indicesheap)
437 514 while gapsheap and density < targetdensity:
438 515 oppgapsize, gapidx = heapq.heappop(gapsheap)
439 516
440 517 heapq.heappush(indicesheap, gapidx)
441 518
442 519 # the gap sizes are stored as negatives to be sorted decreasingly
443 520 # by the heap
444 521 readdata -= (-oppgapsize)
445 522 if readdata > 0:
446 523 density = chainpayload / float(readdata)
447 524 else:
448 525 density = 1.0
449 526
450 527 # Cut the revs at collected indices
451 528 previdx = 0
452 529 while indicesheap:
453 530 idx = heapq.heappop(indicesheap)
454 531
455 532 chunk = _trimchunk(revlog, revs, previdx, idx)
456 533 if chunk:
457 534 yield chunk
458 535
459 536 previdx = idx
460 537
461 538 chunk = _trimchunk(revlog, revs, previdx)
462 539 if chunk:
463 540 yield chunk
464 541
465 542 @attr.s(slots=True, frozen=True)
466 543 class _deltainfo(object):
467 544 distance = attr.ib()
468 545 deltalen = attr.ib()
469 546 data = attr.ib()
470 547 base = attr.ib()
471 548 chainbase = attr.ib()
472 549 chainlen = attr.ib()
473 550 compresseddeltalen = attr.ib()
474 551
475 552 class _deltacomputer(object):
476 553 def __init__(self, revlog):
477 554 self.revlog = revlog
478 555
479 556 def _getcandidaterevs(self, p1, p2, cachedelta):
480 557 """
481 558 Provides revisions that present an interest to be diffed against,
482 559 grouped by level of easiness.
483 560 """
484 561 revlog = self.revlog
485 562 gdelta = revlog._generaldelta
486 563 curr = len(revlog)
487 564 prev = curr - 1
488 565 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
489 566
490 567 # should we try to build a delta?
491 568 if prev != nullrev and revlog.storedeltachains:
492 569 tested = set()
493 570 # This condition is true most of the time when processing
494 571 # changegroup data into a generaldelta repo. The only time it
495 572 # isn't true is if this is the first revision in a delta chain
496 573 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
497 574 if cachedelta and gdelta and revlog._lazydeltabase:
498 575 # Assume what we received from the server is a good choice
499 576 # build delta will reuse the cache
500 577 yield (cachedelta[0],)
501 578 tested.add(cachedelta[0])
502 579
503 580 if gdelta:
504 581 # exclude already lazy tested base if any
505 582 parents = [p for p in (p1r, p2r)
506 583 if p != nullrev and p not in tested]
507 584
508 585 if not revlog._aggressivemergedeltas and len(parents) == 2:
509 586 parents.sort()
510 587 # To minimize the chance of having to build a fulltext,
511 588 # pick first whichever parent is closest to us (max rev)
512 589 yield (parents[1],)
513 590 # then the other one (min rev) if the first did not fit
514 591 yield (parents[0],)
515 592 tested.update(parents)
516 593 elif len(parents) > 0:
517 594 # Test all parents (1 or 2), and keep the best candidate
518 595 yield parents
519 596 tested.update(parents)
520 597
521 598 if prev not in tested:
522 599 # other approach failed try against prev to hopefully save us a
523 600 # fulltext.
524 601 yield (prev,)
525 602 tested.add(prev)
526 603
527 604 def buildtext(self, revinfo, fh):
528 605 """Builds a fulltext version of a revision
529 606
530 607 revinfo: _revisioninfo instance that contains all needed info
531 608 fh: file handle to either the .i or the .d revlog file,
532 609 depending on whether it is inlined or not
533 610 """
534 611 btext = revinfo.btext
535 612 if btext[0] is not None:
536 613 return btext[0]
537 614
538 615 revlog = self.revlog
539 616 cachedelta = revinfo.cachedelta
540 617 flags = revinfo.flags
541 618 node = revinfo.node
542 619
543 620 baserev = cachedelta[0]
544 621 delta = cachedelta[1]
545 622 # special case deltas which replace entire base; no need to decode
546 623 # base revision. this neatly avoids censored bases, which throw when
547 624 # they're decoded.
548 625 hlen = struct.calcsize(">lll")
549 626 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
550 627 len(delta) - hlen):
551 628 btext[0] = delta[hlen:]
552 629 else:
553 630 # deltabase is rawtext before changed by flag processors, which is
554 631 # equivalent to non-raw text
555 632 basetext = revlog.revision(baserev, _df=fh, raw=False)
556 633 btext[0] = mdiff.patch(basetext, delta)
557 634
558 635 try:
559 636 res = revlog._processflags(btext[0], flags, 'read', raw=True)
560 637 btext[0], validatehash = res
561 638 if validatehash:
562 639 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
563 640 if flags & REVIDX_ISCENSORED:
564 641 raise RevlogError(_('node %s is not censored') % node)
565 642 except CensoredNodeError:
566 643 # must pass the censored index flag to add censored revisions
567 644 if not flags & REVIDX_ISCENSORED:
568 645 raise
569 646 return btext[0]
570 647
571 648 def _builddeltadiff(self, base, revinfo, fh):
572 649 revlog = self.revlog
573 650 t = self.buildtext(revinfo, fh)
574 651 if revlog.iscensored(base):
575 652 # deltas based on a censored revision must replace the
576 653 # full content in one patch, so delta works everywhere
577 654 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
578 655 delta = header + t
579 656 else:
580 657 ptext = revlog.revision(base, _df=fh, raw=True)
581 658 delta = mdiff.textdiff(ptext, t)
582 659
583 660 return delta
584 661
585 662 def _builddeltainfo(self, revinfo, base, fh):
586 663 # can we use the cached delta?
587 664 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
588 665 delta = revinfo.cachedelta[1]
589 666 else:
590 667 delta = self._builddeltadiff(base, revinfo, fh)
591 668 revlog = self.revlog
592 669 header, data = revlog.compress(delta)
593 670 deltalen = len(header) + len(data)
594 671 chainbase = revlog.chainbase(base)
595 672 offset = revlog.end(len(revlog) - 1)
596 673 dist = deltalen + offset - revlog.start(chainbase)
597 674 if revlog._generaldelta:
598 675 deltabase = base
599 676 else:
600 677 deltabase = chainbase
601 678 chainlen, compresseddeltalen = revlog._chaininfo(base)
602 679 chainlen += 1
603 680 compresseddeltalen += deltalen
604 681 return _deltainfo(dist, deltalen, (header, data), deltabase,
605 682 chainbase, chainlen, compresseddeltalen)
606 683
607 684 def finddeltainfo(self, revinfo, fh):
608 685 """Find an acceptable delta against a candidate revision
609 686
610 687 revinfo: information about the revision (instance of _revisioninfo)
611 688 fh: file handle to either the .i or the .d revlog file,
612 689 depending on whether it is inlined or not
613 690
614 691 Returns the first acceptable candidate revision, as ordered by
615 692 _getcandidaterevs
616 693 """
617 694 cachedelta = revinfo.cachedelta
618 695 p1 = revinfo.p1
619 696 p2 = revinfo.p2
620 697 revlog = self.revlog
621 698
622 699 deltainfo = None
623 700 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
624 701 nominateddeltas = []
625 702 for candidaterev in candidaterevs:
626 703 # no delta for rawtext-changing revs (see "candelta" for why)
627 704 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
628 705 continue
629 706 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
630 707 if revlog._isgooddeltainfo(candidatedelta, revinfo):
631 708 nominateddeltas.append(candidatedelta)
632 709 if nominateddeltas:
633 710 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
634 711 break
635 712
636 713 return deltainfo
637 714
638 715 @attr.s(slots=True, frozen=True)
639 716 class _revisioninfo(object):
640 717 """Information about a revision that allows building its fulltext
641 718 node: expected hash of the revision
642 719 p1, p2: parent revs of the revision
643 720 btext: built text cache consisting of a one-element list
644 721 cachedelta: (baserev, uncompressed_delta) or None
645 722 flags: flags associated to the revision storage
646 723
647 724 One of btext[0] or cachedelta must be set.
648 725 """
649 726 node = attr.ib()
650 727 p1 = attr.ib()
651 728 p2 = attr.ib()
652 729 btext = attr.ib()
653 730 textlen = attr.ib()
654 731 cachedelta = attr.ib()
655 732 flags = attr.ib()
656 733
657 734 # index v0:
658 735 # 4 bytes: offset
659 736 # 4 bytes: compressed length
660 737 # 4 bytes: base rev
661 738 # 4 bytes: link rev
662 739 # 20 bytes: parent 1 nodeid
663 740 # 20 bytes: parent 2 nodeid
664 741 # 20 bytes: nodeid
665 742 indexformatv0 = struct.Struct(">4l20s20s20s")
666 743 indexformatv0_pack = indexformatv0.pack
667 744 indexformatv0_unpack = indexformatv0.unpack
668 745
669 746 class revlogoldio(object):
670 747 def __init__(self):
671 748 self.size = indexformatv0.size
672 749
673 750 def parseindex(self, data, inline):
674 751 s = self.size
675 752 index = []
676 753 nodemap = {nullid: nullrev}
677 754 n = off = 0
678 755 l = len(data)
679 756 while off + s <= l:
680 757 cur = data[off:off + s]
681 758 off += s
682 759 e = indexformatv0_unpack(cur)
683 760 # transform to revlogv1 format
684 761 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
685 762 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
686 763 index.append(e2)
687 764 nodemap[e[6]] = n
688 765 n += 1
689 766
690 767 # add the magic null revision at -1
691 768 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
692 769
693 770 return index, nodemap, None
694 771
695 772 def packentry(self, entry, node, version, rev):
696 773 if gettype(entry[0]):
697 774 raise RevlogError(_('index entry flags need revlog version 1'))
698 775 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
699 776 node(entry[5]), node(entry[6]), entry[7])
700 777 return indexformatv0_pack(*e2)
701 778
702 779 # index ng:
703 780 # 6 bytes: offset
704 781 # 2 bytes: flags
705 782 # 4 bytes: compressed length
706 783 # 4 bytes: uncompressed length
707 784 # 4 bytes: base rev
708 785 # 4 bytes: link rev
709 786 # 4 bytes: parent 1 rev
710 787 # 4 bytes: parent 2 rev
711 788 # 32 bytes: nodeid
712 789 indexformatng = struct.Struct(">Qiiiiii20s12x")
713 790 indexformatng_pack = indexformatng.pack
714 791 versionformat = struct.Struct(">I")
715 792 versionformat_pack = versionformat.pack
716 793 versionformat_unpack = versionformat.unpack
717 794
718 795 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
719 796 # signed integer)
720 797 _maxentrysize = 0x7fffffff
721 798
722 799 class revlogio(object):
723 800 def __init__(self):
724 801 self.size = indexformatng.size
725 802
726 803 def parseindex(self, data, inline):
727 804 # call the C implementation to parse the index data
728 805 index, cache = parsers.parse_index2(data, inline)
729 806 return index, getattr(index, 'nodemap', None), cache
730 807
731 808 def packentry(self, entry, node, version, rev):
732 809 p = indexformatng_pack(*entry)
733 810 if rev == 0:
734 811 p = versionformat_pack(version) + p[4:]
735 812 return p
736 813
737 814 class revlog(object):
738 815 """
739 816 the underlying revision storage object
740 817
741 818 A revlog consists of two parts, an index and the revision data.
742 819
743 820 The index is a file with a fixed record size containing
744 821 information on each revision, including its nodeid (hash), the
745 822 nodeids of its parents, the position and offset of its data within
746 823 the data file, and the revision it's based on. Finally, each entry
747 824 contains a linkrev entry that can serve as a pointer to external
748 825 data.
749 826
750 827 The revision data itself is a linear collection of data chunks.
751 828 Each chunk represents a revision and is usually represented as a
752 829 delta against the previous chunk. To bound lookup time, runs of
753 830 deltas are limited to about 2 times the length of the original
754 831 version data. This makes retrieval of a version proportional to
755 832 its size, or O(1) relative to the number of revisions.
756 833
757 834 Both pieces of the revlog are written to in an append-only
758 835 fashion, which means we never need to rewrite a file to insert or
759 836 remove data, and can use some simple techniques to avoid the need
760 837 for locking while reading.
761 838
762 839 If checkambig, indexfile is opened with checkambig=True at
763 840 writing, to avoid file stat ambiguity.
764 841
765 842 If mmaplargeindex is True, and an mmapindexthreshold is set, the
766 843 index will be mmapped rather than read if it is larger than the
767 844 configured threshold.
768 845
769 846 If censorable is True, the revlog can have censored revisions.
770 847 """
771 848 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
772 849 mmaplargeindex=False, censorable=False):
773 850 """
774 851 create a revlog object
775 852
776 853 opener is a function that abstracts the file opening operation
777 854 and can be used to implement COW semantics or the like.
778 855 """
779 856 self.indexfile = indexfile
780 857 self.datafile = datafile or (indexfile[:-2] + ".d")
781 858 self.opener = opener
782 859 # When True, indexfile is opened with checkambig=True at writing, to
783 860 # avoid file stat ambiguity.
784 861 self._checkambig = checkambig
785 862 self._censorable = censorable
786 863 # 3-tuple of (node, rev, text) for a raw revision.
787 864 self._cache = None
788 865 # Maps rev to chain base rev.
789 866 self._chainbasecache = util.lrucachedict(100)
790 867 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
791 868 self._chunkcache = (0, '')
792 869 # How much data to read and cache into the raw revlog data cache.
793 870 self._chunkcachesize = 65536
794 871 self._maxchainlen = None
795 872 self._aggressivemergedeltas = True
796 873 self.index = []
797 874 # Mapping of partial identifiers to full nodes.
798 875 self._pcache = {}
799 876 # Mapping of revision integer to full node.
800 877 self._nodecache = {nullid: nullrev}
801 878 self._nodepos = None
802 879 self._compengine = 'zlib'
803 880 self._maxdeltachainspan = -1
804 881 self._withsparseread = False
805 882 self._srdensitythreshold = 0.50
806 883 self._srmingapsize = 262144
807 884
808 885 mmapindexthreshold = None
809 886 v = REVLOG_DEFAULT_VERSION
810 887 opts = getattr(opener, 'options', None)
811 888 if opts is not None:
812 889 if 'revlogv2' in opts:
813 890 # version 2 revlogs always use generaldelta.
814 891 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
815 892 elif 'revlogv1' in opts:
816 893 if 'generaldelta' in opts:
817 894 v |= FLAG_GENERALDELTA
818 895 else:
819 896 v = 0
820 897 if 'chunkcachesize' in opts:
821 898 self._chunkcachesize = opts['chunkcachesize']
822 899 if 'maxchainlen' in opts:
823 900 self._maxchainlen = opts['maxchainlen']
824 901 if 'aggressivemergedeltas' in opts:
825 902 self._aggressivemergedeltas = opts['aggressivemergedeltas']
826 903 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
827 904 if 'compengine' in opts:
828 905 self._compengine = opts['compengine']
829 906 if 'maxdeltachainspan' in opts:
830 907 self._maxdeltachainspan = opts['maxdeltachainspan']
831 908 if mmaplargeindex and 'mmapindexthreshold' in opts:
832 909 mmapindexthreshold = opts['mmapindexthreshold']
833 910 self._withsparseread = bool(opts.get('with-sparse-read', False))
834 911 if 'sparse-read-density-threshold' in opts:
835 912 self._srdensitythreshold = opts['sparse-read-density-threshold']
836 913 if 'sparse-read-min-gap-size' in opts:
837 914 self._srmingapsize = opts['sparse-read-min-gap-size']
838 915
839 916 if self._chunkcachesize <= 0:
840 917 raise RevlogError(_('revlog chunk cache size %r is not greater '
841 918 'than 0') % self._chunkcachesize)
842 919 elif self._chunkcachesize & (self._chunkcachesize - 1):
843 920 raise RevlogError(_('revlog chunk cache size %r is not a power '
844 921 'of 2') % self._chunkcachesize)
845 922
846 923 indexdata = ''
847 924 self._initempty = True
848 925 try:
849 926 with self._indexfp() as f:
850 927 if (mmapindexthreshold is not None and
851 928 self.opener.fstat(f).st_size >= mmapindexthreshold):
852 929 indexdata = util.buffer(util.mmapread(f))
853 930 else:
854 931 indexdata = f.read()
855 932 if len(indexdata) > 0:
856 933 v = versionformat_unpack(indexdata[:4])[0]
857 934 self._initempty = False
858 935 except IOError as inst:
859 936 if inst.errno != errno.ENOENT:
860 937 raise
861 938
862 939 self.version = v
863 940 self._inline = v & FLAG_INLINE_DATA
864 941 self._generaldelta = v & FLAG_GENERALDELTA
865 942 flags = v & ~0xFFFF
866 943 fmt = v & 0xFFFF
867 944 if fmt == REVLOGV0:
868 945 if flags:
869 946 raise RevlogError(_('unknown flags (%#04x) in version %d '
870 947 'revlog %s') %
871 948 (flags >> 16, fmt, self.indexfile))
872 949 elif fmt == REVLOGV1:
873 950 if flags & ~REVLOGV1_FLAGS:
874 951 raise RevlogError(_('unknown flags (%#04x) in version %d '
875 952 'revlog %s') %
876 953 (flags >> 16, fmt, self.indexfile))
877 954 elif fmt == REVLOGV2:
878 955 if flags & ~REVLOGV2_FLAGS:
879 956 raise RevlogError(_('unknown flags (%#04x) in version %d '
880 957 'revlog %s') %
881 958 (flags >> 16, fmt, self.indexfile))
882 959 else:
883 960 raise RevlogError(_('unknown version (%d) in revlog %s') %
884 961 (fmt, self.indexfile))
885 962
886 963 self.storedeltachains = True
887 964
888 965 self._io = revlogio()
889 966 if self.version == REVLOGV0:
890 967 self._io = revlogoldio()
891 968 try:
892 969 d = self._io.parseindex(indexdata, self._inline)
893 970 except (ValueError, IndexError):
894 971 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
895 972 self.index, nodemap, self._chunkcache = d
896 973 if nodemap is not None:
897 974 self.nodemap = self._nodecache = nodemap
898 975 if not self._chunkcache:
899 976 self._chunkclear()
900 977 # revnum -> (chain-length, sum-delta-length)
901 978 self._chaininfocache = {}
902 979 # revlog header -> revlog compressor
903 980 self._decompressors = {}
904 981
905 982 @util.propertycache
906 983 def _compressor(self):
907 984 return util.compengines[self._compengine].revlogcompressor()
908 985
909 986 def _indexfp(self, mode='r'):
910 987 """file object for the revlog's index file"""
911 988 args = {r'mode': mode}
912 989 if mode != 'r':
913 990 args[r'checkambig'] = self._checkambig
914 991 if mode == 'w':
915 992 args[r'atomictemp'] = True
916 993 return self.opener(self.indexfile, **args)
917 994
918 995 def _datafp(self, mode='r'):
919 996 """file object for the revlog's data file"""
920 997 return self.opener(self.datafile, mode=mode)
921 998
922 999 @contextlib.contextmanager
923 1000 def _datareadfp(self, existingfp=None):
924 1001 """file object suitable to read data"""
925 1002 if existingfp is not None:
926 1003 yield existingfp
927 1004 else:
928 1005 if self._inline:
929 1006 func = self._indexfp
930 1007 else:
931 1008 func = self._datafp
932 1009 with func() as fp:
933 1010 yield fp
934 1011
935 1012 def tip(self):
936 1013 return self.node(len(self.index) - 2)
937 1014 def __contains__(self, rev):
938 1015 return 0 <= rev < len(self)
939 1016 def __len__(self):
940 1017 return len(self.index) - 1
941 1018 def __iter__(self):
942 1019 return iter(xrange(len(self)))
943 1020 def revs(self, start=0, stop=None):
944 1021 """iterate over all rev in this revlog (from start to stop)"""
945 1022 step = 1
946 1023 if stop is not None:
947 1024 if start > stop:
948 1025 step = -1
949 1026 stop += step
950 1027 else:
951 1028 stop = len(self)
952 1029 return xrange(start, stop, step)
953 1030
954 1031 @util.propertycache
955 1032 def nodemap(self):
956 1033 self.rev(self.node(0))
957 1034 return self._nodecache
958 1035
959 1036 def hasnode(self, node):
960 1037 try:
961 1038 self.rev(node)
962 1039 return True
963 1040 except KeyError:
964 1041 return False
965 1042
966 1043 def candelta(self, baserev, rev):
967 1044 """whether two revisions (baserev, rev) can be delta-ed or not"""
968 1045 # Disable delta if either rev requires a content-changing flag
969 1046 # processor (ex. LFS). This is because such flag processor can alter
970 1047 # the rawtext content that the delta will be based on, and two clients
971 1048 # could have a same revlog node with different flags (i.e. different
972 1049 # rawtext contents) and the delta could be incompatible.
973 1050 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
974 1051 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
975 1052 return False
976 1053 return True
977 1054
978 1055 def clearcaches(self):
979 1056 self._cache = None
980 1057 self._chainbasecache.clear()
981 1058 self._chunkcache = (0, '')
982 1059 self._pcache = {}
983 1060
984 1061 try:
985 1062 self._nodecache.clearcaches()
986 1063 except AttributeError:
987 1064 self._nodecache = {nullid: nullrev}
988 1065 self._nodepos = None
989 1066
990 1067 def rev(self, node):
991 1068 try:
992 1069 return self._nodecache[node]
993 1070 except TypeError:
994 1071 raise
995 1072 except RevlogError:
996 1073 # parsers.c radix tree lookup failed
997 1074 if node == wdirid or node in wdirfilenodeids:
998 1075 raise error.WdirUnsupported
999 1076 raise LookupError(node, self.indexfile, _('no node'))
1000 1077 except KeyError:
1001 1078 # pure python cache lookup failed
1002 1079 n = self._nodecache
1003 1080 i = self.index
1004 1081 p = self._nodepos
1005 1082 if p is None:
1006 1083 p = len(i) - 2
1007 1084 else:
1008 1085 assert p < len(i)
1009 1086 for r in xrange(p, -1, -1):
1010 1087 v = i[r][7]
1011 1088 n[v] = r
1012 1089 if v == node:
1013 1090 self._nodepos = r - 1
1014 1091 return r
1015 1092 if node == wdirid or node in wdirfilenodeids:
1016 1093 raise error.WdirUnsupported
1017 1094 raise LookupError(node, self.indexfile, _('no node'))
1018 1095
1019 1096 # Accessors for index entries.
1020 1097
1021 1098 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1022 1099 # are flags.
1023 1100 def start(self, rev):
1024 1101 return int(self.index[rev][0] >> 16)
1025 1102
1026 1103 def flags(self, rev):
1027 1104 return self.index[rev][0] & 0xFFFF
1028 1105
1029 1106 def length(self, rev):
1030 1107 return self.index[rev][1]
1031 1108
1032 1109 def rawsize(self, rev):
1033 1110 """return the length of the uncompressed text for a given revision"""
1034 1111 l = self.index[rev][2]
1035 1112 if l >= 0:
1036 1113 return l
1037 1114
1038 1115 t = self.revision(rev, raw=True)
1039 1116 return len(t)
1040 1117
1041 1118 def size(self, rev):
1042 1119 """length of non-raw text (processed by a "read" flag processor)"""
1043 1120 # fast path: if no "read" flag processor could change the content,
1044 1121 # size is rawsize. note: ELLIPSIS is known to not change the content.
1045 1122 flags = self.flags(rev)
1046 1123 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1047 1124 return self.rawsize(rev)
1048 1125
1049 1126 return len(self.revision(rev, raw=False))
1050 1127
1051 1128 def chainbase(self, rev):
1052 1129 base = self._chainbasecache.get(rev)
1053 1130 if base is not None:
1054 1131 return base
1055 1132
1056 1133 index = self.index
1057 1134 iterrev = rev
1058 1135 base = index[iterrev][3]
1059 1136 while base != iterrev:
1060 1137 iterrev = base
1061 1138 base = index[iterrev][3]
1062 1139
1063 1140 self._chainbasecache[rev] = base
1064 1141 return base
1065 1142
1066 1143 def linkrev(self, rev):
1067 1144 return self.index[rev][4]
1068 1145
1069 1146 def parentrevs(self, rev):
1070 1147 try:
1071 1148 entry = self.index[rev]
1072 1149 except IndexError:
1073 1150 if rev == wdirrev:
1074 1151 raise error.WdirUnsupported
1075 1152 raise
1076 1153
1077 1154 return entry[5], entry[6]
1078 1155
1079 1156 def node(self, rev):
1080 1157 try:
1081 1158 return self.index[rev][7]
1082 1159 except IndexError:
1083 1160 if rev == wdirrev:
1084 1161 raise error.WdirUnsupported
1085 1162 raise
1086 1163
1087 1164 # Derived from index values.
1088 1165
1089 1166 def end(self, rev):
1090 1167 return self.start(rev) + self.length(rev)
1091 1168
1092 1169 def parents(self, node):
1093 1170 i = self.index
1094 1171 d = i[self.rev(node)]
1095 1172 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1096 1173
1097 1174 def chainlen(self, rev):
1098 1175 return self._chaininfo(rev)[0]
1099 1176
1100 1177 def _chaininfo(self, rev):
1101 1178 chaininfocache = self._chaininfocache
1102 1179 if rev in chaininfocache:
1103 1180 return chaininfocache[rev]
1104 1181 index = self.index
1105 1182 generaldelta = self._generaldelta
1106 1183 iterrev = rev
1107 1184 e = index[iterrev]
1108 1185 clen = 0
1109 1186 compresseddeltalen = 0
1110 1187 while iterrev != e[3]:
1111 1188 clen += 1
1112 1189 compresseddeltalen += e[1]
1113 1190 if generaldelta:
1114 1191 iterrev = e[3]
1115 1192 else:
1116 1193 iterrev -= 1
1117 1194 if iterrev in chaininfocache:
1118 1195 t = chaininfocache[iterrev]
1119 1196 clen += t[0]
1120 1197 compresseddeltalen += t[1]
1121 1198 break
1122 1199 e = index[iterrev]
1123 1200 else:
1124 1201 # Add text length of base since decompressing that also takes
1125 1202 # work. For cache hits the length is already included.
1126 1203 compresseddeltalen += e[1]
1127 1204 r = (clen, compresseddeltalen)
1128 1205 chaininfocache[rev] = r
1129 1206 return r
1130 1207
1131 1208 def _deltachain(self, rev, stoprev=None):
1132 1209 """Obtain the delta chain for a revision.
1133 1210
1134 1211 ``stoprev`` specifies a revision to stop at. If not specified, we
1135 1212 stop at the base of the chain.
1136 1213
1137 1214 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1138 1215 revs in ascending order and ``stopped`` is a bool indicating whether
1139 1216 ``stoprev`` was hit.
1140 1217 """
1141 1218 # Try C implementation.
1142 1219 try:
1143 1220 return self.index.deltachain(rev, stoprev, self._generaldelta)
1144 1221 except AttributeError:
1145 1222 pass
1146 1223
1147 1224 chain = []
1148 1225
1149 1226 # Alias to prevent attribute lookup in tight loop.
1150 1227 index = self.index
1151 1228 generaldelta = self._generaldelta
1152 1229
1153 1230 iterrev = rev
1154 1231 e = index[iterrev]
1155 1232 while iterrev != e[3] and iterrev != stoprev:
1156 1233 chain.append(iterrev)
1157 1234 if generaldelta:
1158 1235 iterrev = e[3]
1159 1236 else:
1160 1237 iterrev -= 1
1161 1238 e = index[iterrev]
1162 1239
1163 1240 if iterrev == stoprev:
1164 1241 stopped = True
1165 1242 else:
1166 1243 chain.append(iterrev)
1167 1244 stopped = False
1168 1245
1169 1246 chain.reverse()
1170 1247 return chain, stopped
1171 1248
1172 1249 def ancestors(self, revs, stoprev=0, inclusive=False):
1173 1250 """Generate the ancestors of 'revs' in reverse topological order.
1174 1251 Does not generate revs lower than stoprev.
1175 1252
1176 1253 See the documentation for ancestor.lazyancestors for more details."""
1177 1254
1178 1255 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1179 1256 inclusive=inclusive)
1180 1257
1181 1258 def descendants(self, revs):
1182 1259 """Generate the descendants of 'revs' in revision order.
1183 1260
1184 1261 Yield a sequence of revision numbers starting with a child of
1185 1262 some rev in revs, i.e., each revision is *not* considered a
1186 1263 descendant of itself. Results are ordered by revision number (a
1187 1264 topological sort)."""
1188 1265 first = min(revs)
1189 1266 if first == nullrev:
1190 1267 for i in self:
1191 1268 yield i
1192 1269 return
1193 1270
1194 1271 seen = set(revs)
1195 1272 for i in self.revs(start=first + 1):
1196 1273 for x in self.parentrevs(i):
1197 1274 if x != nullrev and x in seen:
1198 1275 seen.add(i)
1199 1276 yield i
1200 1277 break
1201 1278
1202 1279 def findcommonmissing(self, common=None, heads=None):
1203 1280 """Return a tuple of the ancestors of common and the ancestors of heads
1204 1281 that are not ancestors of common. In revset terminology, we return the
1205 1282 tuple:
1206 1283
1207 1284 ::common, (::heads) - (::common)
1208 1285
1209 1286 The list is sorted by revision number, meaning it is
1210 1287 topologically sorted.
1211 1288
1212 1289 'heads' and 'common' are both lists of node IDs. If heads is
1213 1290 not supplied, uses all of the revlog's heads. If common is not
1214 1291 supplied, uses nullid."""
1215 1292 if common is None:
1216 1293 common = [nullid]
1217 1294 if heads is None:
1218 1295 heads = self.heads()
1219 1296
1220 1297 common = [self.rev(n) for n in common]
1221 1298 heads = [self.rev(n) for n in heads]
1222 1299
1223 1300 # we want the ancestors, but inclusive
1224 1301 class lazyset(object):
1225 1302 def __init__(self, lazyvalues):
1226 1303 self.addedvalues = set()
1227 1304 self.lazyvalues = lazyvalues
1228 1305
1229 1306 def __contains__(self, value):
1230 1307 return value in self.addedvalues or value in self.lazyvalues
1231 1308
1232 1309 def __iter__(self):
1233 1310 added = self.addedvalues
1234 1311 for r in added:
1235 1312 yield r
1236 1313 for r in self.lazyvalues:
1237 1314 if not r in added:
1238 1315 yield r
1239 1316
1240 1317 def add(self, value):
1241 1318 self.addedvalues.add(value)
1242 1319
1243 1320 def update(self, values):
1244 1321 self.addedvalues.update(values)
1245 1322
1246 1323 has = lazyset(self.ancestors(common))
1247 1324 has.add(nullrev)
1248 1325 has.update(common)
1249 1326
1250 1327 # take all ancestors from heads that aren't in has
1251 1328 missing = set()
1252 1329 visit = collections.deque(r for r in heads if r not in has)
1253 1330 while visit:
1254 1331 r = visit.popleft()
1255 1332 if r in missing:
1256 1333 continue
1257 1334 else:
1258 1335 missing.add(r)
1259 1336 for p in self.parentrevs(r):
1260 1337 if p not in has:
1261 1338 visit.append(p)
1262 1339 missing = list(missing)
1263 1340 missing.sort()
1264 1341 return has, [self.node(miss) for miss in missing]
1265 1342
1266 1343 def incrementalmissingrevs(self, common=None):
1267 1344 """Return an object that can be used to incrementally compute the
1268 1345 revision numbers of the ancestors of arbitrary sets that are not
1269 1346 ancestors of common. This is an ancestor.incrementalmissingancestors
1270 1347 object.
1271 1348
1272 1349 'common' is a list of revision numbers. If common is not supplied, uses
1273 1350 nullrev.
1274 1351 """
1275 1352 if common is None:
1276 1353 common = [nullrev]
1277 1354
1278 1355 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1279 1356
1280 1357 def findmissingrevs(self, common=None, heads=None):
1281 1358 """Return the revision numbers of the ancestors of heads that
1282 1359 are not ancestors of common.
1283 1360
1284 1361 More specifically, return a list of revision numbers corresponding to
1285 1362 nodes N such that every N satisfies the following constraints:
1286 1363
1287 1364 1. N is an ancestor of some node in 'heads'
1288 1365 2. N is not an ancestor of any node in 'common'
1289 1366
1290 1367 The list is sorted by revision number, meaning it is
1291 1368 topologically sorted.
1292 1369
1293 1370 'heads' and 'common' are both lists of revision numbers. If heads is
1294 1371 not supplied, uses all of the revlog's heads. If common is not
1295 1372 supplied, uses nullid."""
1296 1373 if common is None:
1297 1374 common = [nullrev]
1298 1375 if heads is None:
1299 1376 heads = self.headrevs()
1300 1377
1301 1378 inc = self.incrementalmissingrevs(common=common)
1302 1379 return inc.missingancestors(heads)
1303 1380
1304 1381 def findmissing(self, common=None, heads=None):
1305 1382 """Return the ancestors of heads that are not ancestors of common.
1306 1383
1307 1384 More specifically, return a list of nodes N such that every N
1308 1385 satisfies the following constraints:
1309 1386
1310 1387 1. N is an ancestor of some node in 'heads'
1311 1388 2. N is not an ancestor of any node in 'common'
1312 1389
1313 1390 The list is sorted by revision number, meaning it is
1314 1391 topologically sorted.
1315 1392
1316 1393 'heads' and 'common' are both lists of node IDs. If heads is
1317 1394 not supplied, uses all of the revlog's heads. If common is not
1318 1395 supplied, uses nullid."""
1319 1396 if common is None:
1320 1397 common = [nullid]
1321 1398 if heads is None:
1322 1399 heads = self.heads()
1323 1400
1324 1401 common = [self.rev(n) for n in common]
1325 1402 heads = [self.rev(n) for n in heads]
1326 1403
1327 1404 inc = self.incrementalmissingrevs(common=common)
1328 1405 return [self.node(r) for r in inc.missingancestors(heads)]
1329 1406
1330 1407 def nodesbetween(self, roots=None, heads=None):
1331 1408 """Return a topological path from 'roots' to 'heads'.
1332 1409
1333 1410 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1334 1411 topologically sorted list of all nodes N that satisfy both of
1335 1412 these constraints:
1336 1413
1337 1414 1. N is a descendant of some node in 'roots'
1338 1415 2. N is an ancestor of some node in 'heads'
1339 1416
1340 1417 Every node is considered to be both a descendant and an ancestor
1341 1418 of itself, so every reachable node in 'roots' and 'heads' will be
1342 1419 included in 'nodes'.
1343 1420
1344 1421 'outroots' is the list of reachable nodes in 'roots', i.e., the
1345 1422 subset of 'roots' that is returned in 'nodes'. Likewise,
1346 1423 'outheads' is the subset of 'heads' that is also in 'nodes'.
1347 1424
1348 1425 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1349 1426 unspecified, uses nullid as the only root. If 'heads' is
1350 1427 unspecified, uses list of all of the revlog's heads."""
1351 1428 nonodes = ([], [], [])
1352 1429 if roots is not None:
1353 1430 roots = list(roots)
1354 1431 if not roots:
1355 1432 return nonodes
1356 1433 lowestrev = min([self.rev(n) for n in roots])
1357 1434 else:
1358 1435 roots = [nullid] # Everybody's a descendant of nullid
1359 1436 lowestrev = nullrev
1360 1437 if (lowestrev == nullrev) and (heads is None):
1361 1438 # We want _all_ the nodes!
1362 1439 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1363 1440 if heads is None:
1364 1441 # All nodes are ancestors, so the latest ancestor is the last
1365 1442 # node.
1366 1443 highestrev = len(self) - 1
1367 1444 # Set ancestors to None to signal that every node is an ancestor.
1368 1445 ancestors = None
1369 1446 # Set heads to an empty dictionary for later discovery of heads
1370 1447 heads = {}
1371 1448 else:
1372 1449 heads = list(heads)
1373 1450 if not heads:
1374 1451 return nonodes
1375 1452 ancestors = set()
1376 1453 # Turn heads into a dictionary so we can remove 'fake' heads.
1377 1454 # Also, later we will be using it to filter out the heads we can't
1378 1455 # find from roots.
1379 1456 heads = dict.fromkeys(heads, False)
1380 1457 # Start at the top and keep marking parents until we're done.
1381 1458 nodestotag = set(heads)
1382 1459 # Remember where the top was so we can use it as a limit later.
1383 1460 highestrev = max([self.rev(n) for n in nodestotag])
1384 1461 while nodestotag:
1385 1462 # grab a node to tag
1386 1463 n = nodestotag.pop()
1387 1464 # Never tag nullid
1388 1465 if n == nullid:
1389 1466 continue
1390 1467 # A node's revision number represents its place in a
1391 1468 # topologically sorted list of nodes.
1392 1469 r = self.rev(n)
1393 1470 if r >= lowestrev:
1394 1471 if n not in ancestors:
1395 1472 # If we are possibly a descendant of one of the roots
1396 1473 # and we haven't already been marked as an ancestor
1397 1474 ancestors.add(n) # Mark as ancestor
1398 1475 # Add non-nullid parents to list of nodes to tag.
1399 1476 nodestotag.update([p for p in self.parents(n) if
1400 1477 p != nullid])
1401 1478 elif n in heads: # We've seen it before, is it a fake head?
1402 1479 # So it is, real heads should not be the ancestors of
1403 1480 # any other heads.
1404 1481 heads.pop(n)
1405 1482 if not ancestors:
1406 1483 return nonodes
1407 1484 # Now that we have our set of ancestors, we want to remove any
1408 1485 # roots that are not ancestors.
1409 1486
1410 1487 # If one of the roots was nullid, everything is included anyway.
1411 1488 if lowestrev > nullrev:
1412 1489 # But, since we weren't, let's recompute the lowest rev to not
1413 1490 # include roots that aren't ancestors.
1414 1491
1415 1492 # Filter out roots that aren't ancestors of heads
1416 1493 roots = [root for root in roots if root in ancestors]
1417 1494 # Recompute the lowest revision
1418 1495 if roots:
1419 1496 lowestrev = min([self.rev(root) for root in roots])
1420 1497 else:
1421 1498 # No more roots? Return empty list
1422 1499 return nonodes
1423 1500 else:
1424 1501 # We are descending from nullid, and don't need to care about
1425 1502 # any other roots.
1426 1503 lowestrev = nullrev
1427 1504 roots = [nullid]
1428 1505 # Transform our roots list into a set.
1429 1506 descendants = set(roots)
1430 1507 # Also, keep the original roots so we can filter out roots that aren't
1431 1508 # 'real' roots (i.e. are descended from other roots).
1432 1509 roots = descendants.copy()
1433 1510 # Our topologically sorted list of output nodes.
1434 1511 orderedout = []
1435 1512 # Don't start at nullid since we don't want nullid in our output list,
1436 1513 # and if nullid shows up in descendants, empty parents will look like
1437 1514 # they're descendants.
1438 1515 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1439 1516 n = self.node(r)
1440 1517 isdescendant = False
1441 1518 if lowestrev == nullrev: # Everybody is a descendant of nullid
1442 1519 isdescendant = True
1443 1520 elif n in descendants:
1444 1521 # n is already a descendant
1445 1522 isdescendant = True
1446 1523 # This check only needs to be done here because all the roots
1447 1524 # will start being marked is descendants before the loop.
1448 1525 if n in roots:
1449 1526 # If n was a root, check if it's a 'real' root.
1450 1527 p = tuple(self.parents(n))
1451 1528 # If any of its parents are descendants, it's not a root.
1452 1529 if (p[0] in descendants) or (p[1] in descendants):
1453 1530 roots.remove(n)
1454 1531 else:
1455 1532 p = tuple(self.parents(n))
1456 1533 # A node is a descendant if either of its parents are
1457 1534 # descendants. (We seeded the dependents list with the roots
1458 1535 # up there, remember?)
1459 1536 if (p[0] in descendants) or (p[1] in descendants):
1460 1537 descendants.add(n)
1461 1538 isdescendant = True
1462 1539 if isdescendant and ((ancestors is None) or (n in ancestors)):
1463 1540 # Only include nodes that are both descendants and ancestors.
1464 1541 orderedout.append(n)
1465 1542 if (ancestors is not None) and (n in heads):
1466 1543 # We're trying to figure out which heads are reachable
1467 1544 # from roots.
1468 1545 # Mark this head as having been reached
1469 1546 heads[n] = True
1470 1547 elif ancestors is None:
1471 1548 # Otherwise, we're trying to discover the heads.
1472 1549 # Assume this is a head because if it isn't, the next step
1473 1550 # will eventually remove it.
1474 1551 heads[n] = True
1475 1552 # But, obviously its parents aren't.
1476 1553 for p in self.parents(n):
1477 1554 heads.pop(p, None)
1478 1555 heads = [head for head, flag in heads.iteritems() if flag]
1479 1556 roots = list(roots)
1480 1557 assert orderedout
1481 1558 assert roots
1482 1559 assert heads
1483 1560 return (orderedout, roots, heads)
1484 1561
1485 1562 def headrevs(self):
1486 1563 try:
1487 1564 return self.index.headrevs()
1488 1565 except AttributeError:
1489 1566 return self._headrevs()
1490 1567
1491 1568 def computephases(self, roots):
1492 1569 return self.index.computephasesmapsets(roots)
1493 1570
1494 1571 def _headrevs(self):
1495 1572 count = len(self)
1496 1573 if not count:
1497 1574 return [nullrev]
1498 1575 # we won't iter over filtered rev so nobody is a head at start
1499 1576 ishead = [0] * (count + 1)
1500 1577 index = self.index
1501 1578 for r in self:
1502 1579 ishead[r] = 1 # I may be an head
1503 1580 e = index[r]
1504 1581 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1505 1582 return [r for r, val in enumerate(ishead) if val]
1506 1583
1507 1584 def heads(self, start=None, stop=None):
1508 1585 """return the list of all nodes that have no children
1509 1586
1510 1587 if start is specified, only heads that are descendants of
1511 1588 start will be returned
1512 1589 if stop is specified, it will consider all the revs from stop
1513 1590 as if they had no children
1514 1591 """
1515 1592 if start is None and stop is None:
1516 1593 if not len(self):
1517 1594 return [nullid]
1518 1595 return [self.node(r) for r in self.headrevs()]
1519 1596
1520 1597 if start is None:
1521 1598 start = nullid
1522 1599 if stop is None:
1523 1600 stop = []
1524 1601 stoprevs = set([self.rev(n) for n in stop])
1525 1602 startrev = self.rev(start)
1526 1603 reachable = {startrev}
1527 1604 heads = {startrev}
1528 1605
1529 1606 parentrevs = self.parentrevs
1530 1607 for r in self.revs(start=startrev + 1):
1531 1608 for p in parentrevs(r):
1532 1609 if p in reachable:
1533 1610 if r not in stoprevs:
1534 1611 reachable.add(r)
1535 1612 heads.add(r)
1536 1613 if p in heads and p not in stoprevs:
1537 1614 heads.remove(p)
1538 1615
1539 1616 return [self.node(r) for r in heads]
1540 1617
1541 1618 def children(self, node):
1542 1619 """find the children of a given node"""
1543 1620 c = []
1544 1621 p = self.rev(node)
1545 1622 for r in self.revs(start=p + 1):
1546 1623 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1547 1624 if prevs:
1548 1625 for pr in prevs:
1549 1626 if pr == p:
1550 1627 c.append(self.node(r))
1551 1628 elif p == nullrev:
1552 1629 c.append(self.node(r))
1553 1630 return c
1554 1631
1555 1632 def descendant(self, start, end):
1556 1633 """True if revision 'end' is an descendant of revision 'start'
1557 1634
1558 1635 A revision is considered as a descendant of itself."""
1559 1636 if start == nullrev:
1560 1637 return True
1561 1638 elif start == end:
1562 1639 return True
1563 1640 return start in self._commonancestorsheads(start, end)
1564 1641
1565 1642 def commonancestorsheads(self, a, b):
1566 1643 """calculate all the heads of the common ancestors of nodes a and b"""
1567 1644 a, b = self.rev(a), self.rev(b)
1568 1645 ancs = self._commonancestorsheads(a, b)
1569 1646 return pycompat.maplist(self.node, ancs)
1570 1647
1571 1648 def _commonancestorsheads(self, *revs):
1572 1649 """calculate all the heads of the common ancestors of revs"""
1573 1650 try:
1574 1651 ancs = self.index.commonancestorsheads(*revs)
1575 1652 except (AttributeError, OverflowError): # C implementation failed
1576 1653 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1577 1654 return ancs
1578 1655
1579 1656 def isancestor(self, a, b):
1580 1657 """return True if node a is an ancestor of node b
1581 1658
1582 1659 The implementation of this is trivial but the use of
1583 1660 commonancestorsheads is not."""
1584 1661 a, b = self.rev(a), self.rev(b)
1585 1662 return self.descendant(a, b)
1586 1663
1587 1664 def ancestor(self, a, b):
1588 1665 """calculate the "best" common ancestor of nodes a and b"""
1589 1666
1590 1667 a, b = self.rev(a), self.rev(b)
1591 1668 try:
1592 1669 ancs = self.index.ancestors(a, b)
1593 1670 except (AttributeError, OverflowError):
1594 1671 ancs = ancestor.ancestors(self.parentrevs, a, b)
1595 1672 if ancs:
1596 1673 # choose a consistent winner when there's a tie
1597 1674 return min(map(self.node, ancs))
1598 1675 return nullid
1599 1676
1600 1677 def _match(self, id):
1601 1678 if isinstance(id, int):
1602 1679 # rev
1603 1680 return self.node(id)
1604 1681 if len(id) == 20:
1605 1682 # possibly a binary node
1606 1683 # odds of a binary node being all hex in ASCII are 1 in 10**25
1607 1684 try:
1608 1685 node = id
1609 1686 self.rev(node) # quick search the index
1610 1687 return node
1611 1688 except LookupError:
1612 1689 pass # may be partial hex id
1613 1690 try:
1614 1691 # str(rev)
1615 1692 rev = int(id)
1616 1693 if "%d" % rev != id:
1617 1694 raise ValueError
1618 1695 if rev < 0:
1619 1696 rev = len(self) + rev
1620 1697 if rev < 0 or rev >= len(self):
1621 1698 raise ValueError
1622 1699 return self.node(rev)
1623 1700 except (ValueError, OverflowError):
1624 1701 pass
1625 1702 if len(id) == 40:
1626 1703 try:
1627 1704 # a full hex nodeid?
1628 1705 node = bin(id)
1629 1706 self.rev(node)
1630 1707 return node
1631 1708 except (TypeError, LookupError):
1632 1709 pass
1633 1710
1634 1711 def _partialmatch(self, id):
1635 1712 # we don't care wdirfilenodeids as they should be always full hash
1636 1713 maybewdir = wdirhex.startswith(id)
1637 1714 try:
1638 1715 partial = self.index.partialmatch(id)
1639 1716 if partial and self.hasnode(partial):
1640 1717 if maybewdir:
1641 1718 # single 'ff...' match in radix tree, ambiguous with wdir
1642 1719 raise RevlogError
1643 1720 return partial
1644 1721 if maybewdir:
1645 1722 # no 'ff...' match in radix tree, wdir identified
1646 1723 raise error.WdirUnsupported
1647 1724 return None
1648 1725 except RevlogError:
1649 1726 # parsers.c radix tree lookup gave multiple matches
1650 1727 # fast path: for unfiltered changelog, radix tree is accurate
1651 1728 if not getattr(self, 'filteredrevs', None):
1652 1729 raise LookupError(id, self.indexfile,
1653 1730 _('ambiguous identifier'))
1654 1731 # fall through to slow path that filters hidden revisions
1655 1732 except (AttributeError, ValueError):
1656 1733 # we are pure python, or key was too short to search radix tree
1657 1734 pass
1658 1735
1659 1736 if id in self._pcache:
1660 1737 return self._pcache[id]
1661 1738
1662 1739 if len(id) <= 40:
1663 1740 try:
1664 1741 # hex(node)[:...]
1665 1742 l = len(id) // 2 # grab an even number of digits
1666 1743 prefix = bin(id[:l * 2])
1667 1744 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1668 1745 nl = [n for n in nl if hex(n).startswith(id) and
1669 1746 self.hasnode(n)]
1670 1747 if len(nl) > 0:
1671 1748 if len(nl) == 1 and not maybewdir:
1672 1749 self._pcache[id] = nl[0]
1673 1750 return nl[0]
1674 1751 raise LookupError(id, self.indexfile,
1675 1752 _('ambiguous identifier'))
1676 1753 if maybewdir:
1677 1754 raise error.WdirUnsupported
1678 1755 return None
1679 1756 except TypeError:
1680 1757 pass
1681 1758
1682 1759 def lookup(self, id):
1683 1760 """locate a node based on:
1684 1761 - revision number or str(revision number)
1685 1762 - nodeid or subset of hex nodeid
1686 1763 """
1687 1764 n = self._match(id)
1688 1765 if n is not None:
1689 1766 return n
1690 1767 n = self._partialmatch(id)
1691 1768 if n:
1692 1769 return n
1693 1770
1694 1771 raise LookupError(id, self.indexfile, _('no match found'))
1695 1772
1696 1773 def shortest(self, node, minlength=1):
1697 1774 """Find the shortest unambiguous prefix that matches node."""
1698 1775 def isvalid(prefix):
1699 1776 try:
1700 1777 node = self._partialmatch(prefix)
1701 1778 except error.RevlogError:
1702 1779 return False
1703 1780 except error.WdirUnsupported:
1704 1781 # single 'ff...' match
1705 1782 return True
1706 1783 if node is None:
1707 1784 raise LookupError(node, self.indexfile, _('no node'))
1708 1785 return True
1709 1786
1710 1787 def maybewdir(prefix):
1711 1788 return all(c == 'f' for c in prefix)
1712 1789
1713 1790 hexnode = hex(node)
1714 1791
1715 1792 def disambiguate(hexnode, minlength):
1716 1793 """Disambiguate against wdirid."""
1717 1794 for length in range(minlength, 41):
1718 1795 prefix = hexnode[:length]
1719 1796 if not maybewdir(prefix):
1720 1797 return prefix
1721 1798
1722 1799 if not getattr(self, 'filteredrevs', None):
1723 1800 try:
1724 1801 length = max(self.index.shortest(node), minlength)
1725 1802 return disambiguate(hexnode, length)
1726 1803 except RevlogError:
1727 1804 if node != wdirid:
1728 1805 raise LookupError(node, self.indexfile, _('no node'))
1729 1806 except AttributeError:
1730 1807 # Fall through to pure code
1731 1808 pass
1732 1809
1733 1810 if node == wdirid:
1734 1811 for length in range(minlength, 41):
1735 1812 prefix = hexnode[:length]
1736 1813 if isvalid(prefix):
1737 1814 return prefix
1738 1815
1739 1816 for length in range(minlength, 41):
1740 1817 prefix = hexnode[:length]
1741 1818 if isvalid(prefix):
1742 1819 return disambiguate(hexnode, length)
1743 1820
1744 1821 def cmp(self, node, text):
1745 1822 """compare text with a given file revision
1746 1823
1747 1824 returns True if text is different than what is stored.
1748 1825 """
1749 1826 p1, p2 = self.parents(node)
1750 1827 return hash(text, p1, p2) != node
1751 1828
1752 1829 def _cachesegment(self, offset, data):
1753 1830 """Add a segment to the revlog cache.
1754 1831
1755 1832 Accepts an absolute offset and the data that is at that location.
1756 1833 """
1757 1834 o, d = self._chunkcache
1758 1835 # try to add to existing cache
1759 1836 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1760 1837 self._chunkcache = o, d + data
1761 1838 else:
1762 1839 self._chunkcache = offset, data
1763 1840
1764 1841 def _readsegment(self, offset, length, df=None):
1765 1842 """Load a segment of raw data from the revlog.
1766 1843
1767 1844 Accepts an absolute offset, length to read, and an optional existing
1768 1845 file handle to read from.
1769 1846
1770 1847 If an existing file handle is passed, it will be seeked and the
1771 1848 original seek position will NOT be restored.
1772 1849
1773 1850 Returns a str or buffer of raw byte data.
1774 1851 """
1775 1852 # Cache data both forward and backward around the requested
1776 1853 # data, in a fixed size window. This helps speed up operations
1777 1854 # involving reading the revlog backwards.
1778 1855 cachesize = self._chunkcachesize
1779 1856 realoffset = offset & ~(cachesize - 1)
1780 1857 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1781 1858 - realoffset)
1782 1859 with self._datareadfp(df) as df:
1783 1860 df.seek(realoffset)
1784 1861 d = df.read(reallength)
1785 1862 self._cachesegment(realoffset, d)
1786 1863 if offset != realoffset or reallength != length:
1787 1864 return util.buffer(d, offset - realoffset, length)
1788 1865 return d
1789 1866
1790 1867 def _getsegment(self, offset, length, df=None):
1791 1868 """Obtain a segment of raw data from the revlog.
1792 1869
1793 1870 Accepts an absolute offset, length of bytes to obtain, and an
1794 1871 optional file handle to the already-opened revlog. If the file
1795 1872 handle is used, it's original seek position will not be preserved.
1796 1873
1797 1874 Requests for data may be returned from a cache.
1798 1875
1799 1876 Returns a str or a buffer instance of raw byte data.
1800 1877 """
1801 1878 o, d = self._chunkcache
1802 1879 l = len(d)
1803 1880
1804 1881 # is it in the cache?
1805 1882 cachestart = offset - o
1806 1883 cacheend = cachestart + length
1807 1884 if cachestart >= 0 and cacheend <= l:
1808 1885 if cachestart == 0 and cacheend == l:
1809 1886 return d # avoid a copy
1810 1887 return util.buffer(d, cachestart, cacheend - cachestart)
1811 1888
1812 1889 return self._readsegment(offset, length, df=df)
1813 1890
1814 1891 def _getsegmentforrevs(self, startrev, endrev, df=None):
1815 1892 """Obtain a segment of raw data corresponding to a range of revisions.
1816 1893
1817 1894 Accepts the start and end revisions and an optional already-open
1818 1895 file handle to be used for reading. If the file handle is read, its
1819 1896 seek position will not be preserved.
1820 1897
1821 1898 Requests for data may be satisfied by a cache.
1822 1899
1823 1900 Returns a 2-tuple of (offset, data) for the requested range of
1824 1901 revisions. Offset is the integer offset from the beginning of the
1825 1902 revlog and data is a str or buffer of the raw byte data.
1826 1903
1827 1904 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1828 1905 to determine where each revision's data begins and ends.
1829 1906 """
1830 1907 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1831 1908 # (functions are expensive).
1832 1909 index = self.index
1833 1910 istart = index[startrev]
1834 1911 start = int(istart[0] >> 16)
1835 1912 if startrev == endrev:
1836 1913 end = start + istart[1]
1837 1914 else:
1838 1915 iend = index[endrev]
1839 1916 end = int(iend[0] >> 16) + iend[1]
1840 1917
1841 1918 if self._inline:
1842 1919 start += (startrev + 1) * self._io.size
1843 1920 end += (endrev + 1) * self._io.size
1844 1921 length = end - start
1845 1922
1846 1923 return start, self._getsegment(start, length, df=df)
1847 1924
1848 1925 def _chunk(self, rev, df=None):
1849 1926 """Obtain a single decompressed chunk for a revision.
1850 1927
1851 1928 Accepts an integer revision and an optional already-open file handle
1852 1929 to be used for reading. If used, the seek position of the file will not
1853 1930 be preserved.
1854 1931
1855 1932 Returns a str holding uncompressed data for the requested revision.
1856 1933 """
1857 1934 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1858 1935
1859 1936 def _chunks(self, revs, df=None):
1860 1937 """Obtain decompressed chunks for the specified revisions.
1861 1938
1862 1939 Accepts an iterable of numeric revisions that are assumed to be in
1863 1940 ascending order. Also accepts an optional already-open file handle
1864 1941 to be used for reading. If used, the seek position of the file will
1865 1942 not be preserved.
1866 1943
1867 1944 This function is similar to calling ``self._chunk()`` multiple times,
1868 1945 but is faster.
1869 1946
1870 1947 Returns a list with decompressed data for each requested revision.
1871 1948 """
1872 1949 if not revs:
1873 1950 return []
1874 1951 start = self.start
1875 1952 length = self.length
1876 1953 inline = self._inline
1877 1954 iosize = self._io.size
1878 1955 buffer = util.buffer
1879 1956
1880 1957 l = []
1881 1958 ladd = l.append
1882 1959
1883 1960 if not self._withsparseread:
1884 1961 slicedchunks = (revs,)
1885 1962 else:
1886 1963 slicedchunks = _slicechunk(self, revs)
1887 1964
1888 1965 for revschunk in slicedchunks:
1889 1966 firstrev = revschunk[0]
1890 1967 # Skip trailing revisions with empty diff
1891 1968 for lastrev in revschunk[::-1]:
1892 1969 if length(lastrev) != 0:
1893 1970 break
1894 1971
1895 1972 try:
1896 1973 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1897 1974 except OverflowError:
1898 1975 # issue4215 - we can't cache a run of chunks greater than
1899 1976 # 2G on Windows
1900 1977 return [self._chunk(rev, df=df) for rev in revschunk]
1901 1978
1902 1979 decomp = self.decompress
1903 1980 for rev in revschunk:
1904 1981 chunkstart = start(rev)
1905 1982 if inline:
1906 1983 chunkstart += (rev + 1) * iosize
1907 1984 chunklength = length(rev)
1908 1985 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1909 1986
1910 1987 return l
1911 1988
1912 1989 def _chunkclear(self):
1913 1990 """Clear the raw chunk cache."""
1914 1991 self._chunkcache = (0, '')
1915 1992
1916 1993 def deltaparent(self, rev):
1917 1994 """return deltaparent of the given revision"""
1918 1995 base = self.index[rev][3]
1919 1996 if base == rev:
1920 1997 return nullrev
1921 1998 elif self._generaldelta:
1922 1999 return base
1923 2000 else:
1924 2001 return rev - 1
1925 2002
1926 2003 def revdiff(self, rev1, rev2):
1927 2004 """return or calculate a delta between two revisions
1928 2005
1929 2006 The delta calculated is in binary form and is intended to be written to
1930 2007 revlog data directly. So this function needs raw revision data.
1931 2008 """
1932 2009 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1933 2010 return bytes(self._chunk(rev2))
1934 2011
1935 2012 return mdiff.textdiff(self.revision(rev1, raw=True),
1936 2013 self.revision(rev2, raw=True))
1937 2014
1938 2015 def revision(self, nodeorrev, _df=None, raw=False):
1939 2016 """return an uncompressed revision of a given node or revision
1940 2017 number.
1941 2018
1942 2019 _df - an existing file handle to read from. (internal-only)
1943 2020 raw - an optional argument specifying if the revision data is to be
1944 2021 treated as raw data when applying flag transforms. 'raw' should be set
1945 2022 to True when generating changegroups or in debug commands.
1946 2023 """
1947 2024 if isinstance(nodeorrev, int):
1948 2025 rev = nodeorrev
1949 2026 node = self.node(rev)
1950 2027 else:
1951 2028 node = nodeorrev
1952 2029 rev = None
1953 2030
1954 2031 cachedrev = None
1955 2032 flags = None
1956 2033 rawtext = None
1957 2034 if node == nullid:
1958 2035 return ""
1959 2036 if self._cache:
1960 2037 if self._cache[0] == node:
1961 2038 # _cache only stores rawtext
1962 2039 if raw:
1963 2040 return self._cache[2]
1964 2041 # duplicated, but good for perf
1965 2042 if rev is None:
1966 2043 rev = self.rev(node)
1967 2044 if flags is None:
1968 2045 flags = self.flags(rev)
1969 2046 # no extra flags set, no flag processor runs, text = rawtext
1970 2047 if flags == REVIDX_DEFAULT_FLAGS:
1971 2048 return self._cache[2]
1972 2049 # rawtext is reusable. need to run flag processor
1973 2050 rawtext = self._cache[2]
1974 2051
1975 2052 cachedrev = self._cache[1]
1976 2053
1977 2054 # look up what we need to read
1978 2055 if rawtext is None:
1979 2056 if rev is None:
1980 2057 rev = self.rev(node)
1981 2058
1982 2059 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1983 2060 if stopped:
1984 2061 rawtext = self._cache[2]
1985 2062
1986 2063 # drop cache to save memory
1987 2064 self._cache = None
1988 2065
1989 2066 bins = self._chunks(chain, df=_df)
1990 2067 if rawtext is None:
1991 2068 rawtext = bytes(bins[0])
1992 2069 bins = bins[1:]
1993 2070
1994 2071 rawtext = mdiff.patches(rawtext, bins)
1995 2072 self._cache = (node, rev, rawtext)
1996 2073
1997 2074 if flags is None:
1998 2075 if rev is None:
1999 2076 rev = self.rev(node)
2000 2077 flags = self.flags(rev)
2001 2078
2002 2079 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2003 2080 if validatehash:
2004 2081 self.checkhash(text, node, rev=rev)
2005 2082
2006 2083 return text
2007 2084
2008 2085 def hash(self, text, p1, p2):
2009 2086 """Compute a node hash.
2010 2087
2011 2088 Available as a function so that subclasses can replace the hash
2012 2089 as needed.
2013 2090 """
2014 2091 return hash(text, p1, p2)
2015 2092
2016 2093 def _processflags(self, text, flags, operation, raw=False):
2017 2094 """Inspect revision data flags and applies transforms defined by
2018 2095 registered flag processors.
2019 2096
2020 2097 ``text`` - the revision data to process
2021 2098 ``flags`` - the revision flags
2022 2099 ``operation`` - the operation being performed (read or write)
2023 2100 ``raw`` - an optional argument describing if the raw transform should be
2024 2101 applied.
2025 2102
2026 2103 This method processes the flags in the order (or reverse order if
2027 2104 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2028 2105 flag processors registered for present flags. The order of flags defined
2029 2106 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2030 2107
2031 2108 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2032 2109 processed text and ``validatehash`` is a bool indicating whether the
2033 2110 returned text should be checked for hash integrity.
2034 2111
2035 2112 Note: If the ``raw`` argument is set, it has precedence over the
2036 2113 operation and will only update the value of ``validatehash``.
2037 2114 """
2038 2115 # fast path: no flag processors will run
2039 2116 if flags == 0:
2040 2117 return text, True
2041 2118 if not operation in ('read', 'write'):
2042 2119 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2043 2120 # Check all flags are known.
2044 2121 if flags & ~REVIDX_KNOWN_FLAGS:
2045 2122 raise RevlogError(_("incompatible revision flag '%#x'") %
2046 2123 (flags & ~REVIDX_KNOWN_FLAGS))
2047 2124 validatehash = True
2048 2125 # Depending on the operation (read or write), the order might be
2049 2126 # reversed due to non-commutative transforms.
2050 2127 orderedflags = REVIDX_FLAGS_ORDER
2051 2128 if operation == 'write':
2052 2129 orderedflags = reversed(orderedflags)
2053 2130
2054 2131 for flag in orderedflags:
2055 2132 # If a flagprocessor has been registered for a known flag, apply the
2056 2133 # related operation transform and update result tuple.
2057 2134 if flag & flags:
2058 2135 vhash = True
2059 2136
2060 2137 if flag not in _flagprocessors:
2061 2138 message = _("missing processor for flag '%#x'") % (flag)
2062 2139 raise RevlogError(message)
2063 2140
2064 2141 processor = _flagprocessors[flag]
2065 2142 if processor is not None:
2066 2143 readtransform, writetransform, rawtransform = processor
2067 2144
2068 2145 if raw:
2069 2146 vhash = rawtransform(self, text)
2070 2147 elif operation == 'read':
2071 2148 text, vhash = readtransform(self, text)
2072 2149 else: # write operation
2073 2150 text, vhash = writetransform(self, text)
2074 2151 validatehash = validatehash and vhash
2075 2152
2076 2153 return text, validatehash
2077 2154
2078 2155 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2079 2156 """Check node hash integrity.
2080 2157
2081 2158 Available as a function so that subclasses can extend hash mismatch
2082 2159 behaviors as needed.
2083 2160 """
2084 2161 try:
2085 2162 if p1 is None and p2 is None:
2086 2163 p1, p2 = self.parents(node)
2087 2164 if node != self.hash(text, p1, p2):
2088 2165 revornode = rev
2089 2166 if revornode is None:
2090 2167 revornode = templatefilters.short(hex(node))
2091 2168 raise RevlogError(_("integrity check failed on %s:%s")
2092 2169 % (self.indexfile, pycompat.bytestr(revornode)))
2093 2170 except RevlogError:
2094 2171 if self._censorable and _censoredtext(text):
2095 2172 raise error.CensoredNodeError(self.indexfile, node, text)
2096 2173 raise
2097 2174
2098 2175 def _enforceinlinesize(self, tr, fp=None):
2099 2176 """Check if the revlog is too big for inline and convert if so.
2100 2177
2101 2178 This should be called after revisions are added to the revlog. If the
2102 2179 revlog has grown too large to be an inline revlog, it will convert it
2103 2180 to use multiple index and data files.
2104 2181 """
2105 2182 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2106 2183 return
2107 2184
2108 2185 trinfo = tr.find(self.indexfile)
2109 2186 if trinfo is None:
2110 2187 raise RevlogError(_("%s not found in the transaction")
2111 2188 % self.indexfile)
2112 2189
2113 2190 trindex = trinfo[2]
2114 2191 if trindex is not None:
2115 2192 dataoff = self.start(trindex)
2116 2193 else:
2117 2194 # revlog was stripped at start of transaction, use all leftover data
2118 2195 trindex = len(self) - 1
2119 2196 dataoff = self.end(-2)
2120 2197
2121 2198 tr.add(self.datafile, dataoff)
2122 2199
2123 2200 if fp:
2124 2201 fp.flush()
2125 2202 fp.close()
2126 2203
2127 2204 with self._datafp('w') as df:
2128 2205 for r in self:
2129 2206 df.write(self._getsegmentforrevs(r, r)[1])
2130 2207
2131 2208 with self._indexfp('w') as fp:
2132 2209 self.version &= ~FLAG_INLINE_DATA
2133 2210 self._inline = False
2134 2211 io = self._io
2135 2212 for i in self:
2136 2213 e = io.packentry(self.index[i], self.node, self.version, i)
2137 2214 fp.write(e)
2138 2215
2139 2216 # the temp file replace the real index when we exit the context
2140 2217 # manager
2141 2218
2142 2219 tr.replace(self.indexfile, trindex * self._io.size)
2143 2220 self._chunkclear()
2144 2221
2145 2222 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2146 2223 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2147 2224 """add a revision to the log
2148 2225
2149 2226 text - the revision data to add
2150 2227 transaction - the transaction object used for rollback
2151 2228 link - the linkrev data to add
2152 2229 p1, p2 - the parent nodeids of the revision
2153 2230 cachedelta - an optional precomputed delta
2154 2231 node - nodeid of revision; typically node is not specified, and it is
2155 2232 computed by default as hash(text, p1, p2), however subclasses might
2156 2233 use different hashing method (and override checkhash() in such case)
2157 2234 flags - the known flags to set on the revision
2158 2235 deltacomputer - an optional _deltacomputer instance shared between
2159 2236 multiple calls
2160 2237 """
2161 2238 if link == nullrev:
2162 2239 raise RevlogError(_("attempted to add linkrev -1 to %s")
2163 2240 % self.indexfile)
2164 2241
2165 2242 if flags:
2166 2243 node = node or self.hash(text, p1, p2)
2167 2244
2168 2245 rawtext, validatehash = self._processflags(text, flags, 'write')
2169 2246
2170 2247 # If the flag processor modifies the revision data, ignore any provided
2171 2248 # cachedelta.
2172 2249 if rawtext != text:
2173 2250 cachedelta = None
2174 2251
2175 2252 if len(rawtext) > _maxentrysize:
2176 2253 raise RevlogError(
2177 2254 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2178 2255 % (self.indexfile, len(rawtext)))
2179 2256
2180 2257 node = node or self.hash(rawtext, p1, p2)
2181 2258 if node in self.nodemap:
2182 2259 return node
2183 2260
2184 2261 if validatehash:
2185 2262 self.checkhash(rawtext, node, p1=p1, p2=p2)
2186 2263
2187 2264 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2188 2265 flags, cachedelta=cachedelta,
2189 2266 deltacomputer=deltacomputer)
2190 2267
2191 2268 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2192 2269 cachedelta=None, deltacomputer=None):
2193 2270 """add a raw revision with known flags, node and parents
2194 2271 useful when reusing a revision not stored in this revlog (ex: received
2195 2272 over wire, or read from an external bundle).
2196 2273 """
2197 2274 dfh = None
2198 2275 if not self._inline:
2199 2276 dfh = self._datafp("a+")
2200 2277 ifh = self._indexfp("a+")
2201 2278 try:
2202 2279 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2203 2280 flags, cachedelta, ifh, dfh,
2204 2281 deltacomputer=deltacomputer)
2205 2282 finally:
2206 2283 if dfh:
2207 2284 dfh.close()
2208 2285 ifh.close()
2209 2286
2210 2287 def compress(self, data):
2211 2288 """Generate a possibly-compressed representation of data."""
2212 2289 if not data:
2213 2290 return '', data
2214 2291
2215 2292 compressed = self._compressor.compress(data)
2216 2293
2217 2294 if compressed:
2218 2295 # The revlog compressor added the header in the returned data.
2219 2296 return '', compressed
2220 2297
2221 2298 if data[0:1] == '\0':
2222 2299 return '', data
2223 2300 return 'u', data
2224 2301
2225 2302 def decompress(self, data):
2226 2303 """Decompress a revlog chunk.
2227 2304
2228 2305 The chunk is expected to begin with a header identifying the
2229 2306 format type so it can be routed to an appropriate decompressor.
2230 2307 """
2231 2308 if not data:
2232 2309 return data
2233 2310
2234 2311 # Revlogs are read much more frequently than they are written and many
2235 2312 # chunks only take microseconds to decompress, so performance is
2236 2313 # important here.
2237 2314 #
2238 2315 # We can make a few assumptions about revlogs:
2239 2316 #
2240 2317 # 1) the majority of chunks will be compressed (as opposed to inline
2241 2318 # raw data).
2242 2319 # 2) decompressing *any* data will likely by at least 10x slower than
2243 2320 # returning raw inline data.
2244 2321 # 3) we want to prioritize common and officially supported compression
2245 2322 # engines
2246 2323 #
2247 2324 # It follows that we want to optimize for "decompress compressed data
2248 2325 # when encoded with common and officially supported compression engines"
2249 2326 # case over "raw data" and "data encoded by less common or non-official
2250 2327 # compression engines." That is why we have the inline lookup first
2251 2328 # followed by the compengines lookup.
2252 2329 #
2253 2330 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2254 2331 # compressed chunks. And this matters for changelog and manifest reads.
2255 2332 t = data[0:1]
2256 2333
2257 2334 if t == 'x':
2258 2335 try:
2259 2336 return _zlibdecompress(data)
2260 2337 except zlib.error as e:
2261 2338 raise RevlogError(_('revlog decompress error: %s') %
2262 2339 stringutil.forcebytestr(e))
2263 2340 # '\0' is more common than 'u' so it goes first.
2264 2341 elif t == '\0':
2265 2342 return data
2266 2343 elif t == 'u':
2267 2344 return util.buffer(data, 1)
2268 2345
2269 2346 try:
2270 2347 compressor = self._decompressors[t]
2271 2348 except KeyError:
2272 2349 try:
2273 2350 engine = util.compengines.forrevlogheader(t)
2274 2351 compressor = engine.revlogcompressor()
2275 2352 self._decompressors[t] = compressor
2276 2353 except KeyError:
2277 2354 raise RevlogError(_('unknown compression type %r') % t)
2278 2355
2279 2356 return compressor.decompress(data)
2280 2357
2281 2358 def _isgooddeltainfo(self, deltainfo, revinfo):
2282 2359 """Returns True if the given delta is good. Good means that it is within
2283 2360 the disk span, disk size, and chain length bounds that we know to be
2284 2361 performant."""
2285 2362 if deltainfo is None:
2286 2363 return False
2287 2364
2288 2365 # - 'deltainfo.distance' is the distance from the base revision --
2289 2366 # bounding it limits the amount of I/O we need to do.
2290 2367 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2291 2368 # deltas we need to apply -- bounding it limits the amount of CPU
2292 2369 # we consume.
2293 2370
2294 2371 textlen = revinfo.textlen
2295 2372 defaultmax = textlen * 4
2296 2373 maxdist = self._maxdeltachainspan
2297 2374 if not maxdist:
2298 2375 maxdist = deltainfo.distance # ensure the conditional pass
2299 2376 maxdist = max(maxdist, defaultmax)
2300 2377 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2301 2378 deltainfo.compresseddeltalen > textlen * 2 or
2302 2379 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2303 2380 return False
2304 2381
2305 2382 return True
2306 2383
2307 2384 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2308 2385 cachedelta, ifh, dfh, alwayscache=False,
2309 2386 deltacomputer=None):
2310 2387 """internal function to add revisions to the log
2311 2388
2312 2389 see addrevision for argument descriptions.
2313 2390
2314 2391 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2315 2392
2316 2393 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2317 2394 be used.
2318 2395
2319 2396 invariants:
2320 2397 - rawtext is optional (can be None); if not set, cachedelta must be set.
2321 2398 if both are set, they must correspond to each other.
2322 2399 """
2323 2400 if node == nullid:
2324 2401 raise RevlogError(_("%s: attempt to add null revision") %
2325 2402 (self.indexfile))
2326 2403 if node == wdirid or node in wdirfilenodeids:
2327 2404 raise RevlogError(_("%s: attempt to add wdir revision") %
2328 2405 (self.indexfile))
2329 2406
2330 2407 if self._inline:
2331 2408 fh = ifh
2332 2409 else:
2333 2410 fh = dfh
2334 2411
2335 2412 btext = [rawtext]
2336 2413
2337 2414 curr = len(self)
2338 2415 prev = curr - 1
2339 2416 offset = self.end(prev)
2340 2417 p1r, p2r = self.rev(p1), self.rev(p2)
2341 2418
2342 2419 # full versions are inserted when the needed deltas
2343 2420 # become comparable to the uncompressed text
2344 2421 if rawtext is None:
2345 2422 # need rawtext size, before changed by flag processors, which is
2346 2423 # the non-raw size. use revlog explicitly to avoid filelog's extra
2347 2424 # logic that might remove metadata size.
2348 2425 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2349 2426 cachedelta[1])
2350 2427 else:
2351 2428 textlen = len(rawtext)
2352 2429
2353 2430 if deltacomputer is None:
2354 2431 deltacomputer = _deltacomputer(self)
2355 2432
2356 2433 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2357 2434
2358 2435 # no delta for flag processor revision (see "candelta" for why)
2359 2436 # not calling candelta since only one revision needs test, also to
2360 2437 # avoid overhead fetching flags again.
2361 2438 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2362 2439 deltainfo = None
2363 2440 else:
2364 2441 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2365 2442
2366 2443 if deltainfo is not None:
2367 2444 base = deltainfo.base
2368 2445 chainbase = deltainfo.chainbase
2369 2446 data = deltainfo.data
2370 2447 l = deltainfo.deltalen
2371 2448 else:
2372 2449 rawtext = deltacomputer.buildtext(revinfo, fh)
2373 2450 data = self.compress(rawtext)
2374 2451 l = len(data[1]) + len(data[0])
2375 2452 base = chainbase = curr
2376 2453
2377 2454 e = (offset_type(offset, flags), l, textlen,
2378 2455 base, link, p1r, p2r, node)
2379 2456 self.index.insert(-1, e)
2380 2457 self.nodemap[node] = curr
2381 2458
2382 2459 entry = self._io.packentry(e, self.node, self.version, curr)
2383 2460 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2384 2461
2385 2462 if alwayscache and rawtext is None:
2386 2463 rawtext = deltacomputer._buildtext(revinfo, fh)
2387 2464
2388 2465 if type(rawtext) == bytes: # only accept immutable objects
2389 2466 self._cache = (node, curr, rawtext)
2390 2467 self._chainbasecache[curr] = chainbase
2391 2468 return node
2392 2469
2393 2470 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2394 2471 # Files opened in a+ mode have inconsistent behavior on various
2395 2472 # platforms. Windows requires that a file positioning call be made
2396 2473 # when the file handle transitions between reads and writes. See
2397 2474 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2398 2475 # platforms, Python or the platform itself can be buggy. Some versions
2399 2476 # of Solaris have been observed to not append at the end of the file
2400 2477 # if the file was seeked to before the end. See issue4943 for more.
2401 2478 #
2402 2479 # We work around this issue by inserting a seek() before writing.
2403 2480 # Note: This is likely not necessary on Python 3.
2404 2481 ifh.seek(0, os.SEEK_END)
2405 2482 if dfh:
2406 2483 dfh.seek(0, os.SEEK_END)
2407 2484
2408 2485 curr = len(self) - 1
2409 2486 if not self._inline:
2410 2487 transaction.add(self.datafile, offset)
2411 2488 transaction.add(self.indexfile, curr * len(entry))
2412 2489 if data[0]:
2413 2490 dfh.write(data[0])
2414 2491 dfh.write(data[1])
2415 2492 ifh.write(entry)
2416 2493 else:
2417 2494 offset += curr * self._io.size
2418 2495 transaction.add(self.indexfile, offset, curr)
2419 2496 ifh.write(entry)
2420 2497 ifh.write(data[0])
2421 2498 ifh.write(data[1])
2422 2499 self._enforceinlinesize(transaction, ifh)
2423 2500
2424 2501 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2425 2502 """
2426 2503 add a delta group
2427 2504
2428 2505 given a set of deltas, add them to the revision log. the
2429 2506 first delta is against its parent, which should be in our
2430 2507 log, the rest are against the previous delta.
2431 2508
2432 2509 If ``addrevisioncb`` is defined, it will be called with arguments of
2433 2510 this revlog and the node that was added.
2434 2511 """
2435 2512
2436 2513 nodes = []
2437 2514
2438 2515 r = len(self)
2439 2516 end = 0
2440 2517 if r:
2441 2518 end = self.end(r - 1)
2442 2519 ifh = self._indexfp("a+")
2443 2520 isize = r * self._io.size
2444 2521 if self._inline:
2445 2522 transaction.add(self.indexfile, end + isize, r)
2446 2523 dfh = None
2447 2524 else:
2448 2525 transaction.add(self.indexfile, isize, r)
2449 2526 transaction.add(self.datafile, end)
2450 2527 dfh = self._datafp("a+")
2451 2528 def flush():
2452 2529 if dfh:
2453 2530 dfh.flush()
2454 2531 ifh.flush()
2455 2532 try:
2456 2533 deltacomputer = _deltacomputer(self)
2457 2534 # loop through our set of deltas
2458 2535 for data in deltas:
2459 2536 node, p1, p2, linknode, deltabase, delta, flags = data
2460 2537 link = linkmapper(linknode)
2461 2538 flags = flags or REVIDX_DEFAULT_FLAGS
2462 2539
2463 2540 nodes.append(node)
2464 2541
2465 2542 if node in self.nodemap:
2466 2543 # this can happen if two branches make the same change
2467 2544 continue
2468 2545
2469 2546 for p in (p1, p2):
2470 2547 if p not in self.nodemap:
2471 2548 raise LookupError(p, self.indexfile,
2472 2549 _('unknown parent'))
2473 2550
2474 2551 if deltabase not in self.nodemap:
2475 2552 raise LookupError(deltabase, self.indexfile,
2476 2553 _('unknown delta base'))
2477 2554
2478 2555 baserev = self.rev(deltabase)
2479 2556
2480 2557 if baserev != nullrev and self.iscensored(baserev):
2481 2558 # if base is censored, delta must be full replacement in a
2482 2559 # single patch operation
2483 2560 hlen = struct.calcsize(">lll")
2484 2561 oldlen = self.rawsize(baserev)
2485 2562 newlen = len(delta) - hlen
2486 2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2487 2564 raise error.CensoredBaseError(self.indexfile,
2488 2565 self.node(baserev))
2489 2566
2490 2567 if not flags and self._peek_iscensored(baserev, delta, flush):
2491 2568 flags |= REVIDX_ISCENSORED
2492 2569
2493 2570 # We assume consumers of addrevisioncb will want to retrieve
2494 2571 # the added revision, which will require a call to
2495 2572 # revision(). revision() will fast path if there is a cache
2496 2573 # hit. So, we tell _addrevision() to always cache in this case.
2497 2574 # We're only using addgroup() in the context of changegroup
2498 2575 # generation so the revision data can always be handled as raw
2499 2576 # by the flagprocessor.
2500 2577 self._addrevision(node, None, transaction, link,
2501 2578 p1, p2, flags, (baserev, delta),
2502 2579 ifh, dfh,
2503 2580 alwayscache=bool(addrevisioncb),
2504 2581 deltacomputer=deltacomputer)
2505 2582
2506 2583 if addrevisioncb:
2507 2584 addrevisioncb(self, node)
2508 2585
2509 2586 if not dfh and not self._inline:
2510 2587 # addrevision switched from inline to conventional
2511 2588 # reopen the index
2512 2589 ifh.close()
2513 2590 dfh = self._datafp("a+")
2514 2591 ifh = self._indexfp("a+")
2515 2592 finally:
2516 2593 if dfh:
2517 2594 dfh.close()
2518 2595 ifh.close()
2519 2596
2520 2597 return nodes
2521 2598
2522 2599 def iscensored(self, rev):
2523 2600 """Check if a file revision is censored."""
2524 2601 if not self._censorable:
2525 2602 return False
2526 2603
2527 2604 return self.flags(rev) & REVIDX_ISCENSORED
2528 2605
2529 2606 def _peek_iscensored(self, baserev, delta, flush):
2530 2607 """Quickly check if a delta produces a censored revision."""
2531 2608 if not self._censorable:
2532 2609 return False
2533 2610
2534 2611 # Fragile heuristic: unless new file meta keys are added alphabetically
2535 2612 # preceding "censored", all censored revisions are prefixed by
2536 2613 # "\1\ncensored:". A delta producing such a censored revision must be a
2537 2614 # full-replacement delta, so we inspect the first and only patch in the
2538 2615 # delta for this prefix.
2539 2616 hlen = struct.calcsize(">lll")
2540 2617 if len(delta) <= hlen:
2541 2618 return False
2542 2619
2543 2620 oldlen = self.rawsize(baserev)
2544 2621 newlen = len(delta) - hlen
2545 2622 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2546 2623 return False
2547 2624
2548 2625 add = "\1\ncensored:"
2549 2626 addlen = len(add)
2550 2627 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2551 2628
2552 2629 def getstrippoint(self, minlink):
2553 2630 """find the minimum rev that must be stripped to strip the linkrev
2554 2631
2555 2632 Returns a tuple containing the minimum rev and a set of all revs that
2556 2633 have linkrevs that will be broken by this strip.
2557 2634 """
2558 2635 brokenrevs = set()
2559 2636 strippoint = len(self)
2560 2637
2561 2638 heads = {}
2562 2639 futurelargelinkrevs = set()
2563 2640 for head in self.headrevs():
2564 2641 headlinkrev = self.linkrev(head)
2565 2642 heads[head] = headlinkrev
2566 2643 if headlinkrev >= minlink:
2567 2644 futurelargelinkrevs.add(headlinkrev)
2568 2645
2569 2646 # This algorithm involves walking down the rev graph, starting at the
2570 2647 # heads. Since the revs are topologically sorted according to linkrev,
2571 2648 # once all head linkrevs are below the minlink, we know there are
2572 2649 # no more revs that could have a linkrev greater than minlink.
2573 2650 # So we can stop walking.
2574 2651 while futurelargelinkrevs:
2575 2652 strippoint -= 1
2576 2653 linkrev = heads.pop(strippoint)
2577 2654
2578 2655 if linkrev < minlink:
2579 2656 brokenrevs.add(strippoint)
2580 2657 else:
2581 2658 futurelargelinkrevs.remove(linkrev)
2582 2659
2583 2660 for p in self.parentrevs(strippoint):
2584 2661 if p != nullrev:
2585 2662 plinkrev = self.linkrev(p)
2586 2663 heads[p] = plinkrev
2587 2664 if plinkrev >= minlink:
2588 2665 futurelargelinkrevs.add(plinkrev)
2589 2666
2590 2667 return strippoint, brokenrevs
2591 2668
2592 2669 def strip(self, minlink, transaction):
2593 2670 """truncate the revlog on the first revision with a linkrev >= minlink
2594 2671
2595 2672 This function is called when we're stripping revision minlink and
2596 2673 its descendants from the repository.
2597 2674
2598 2675 We have to remove all revisions with linkrev >= minlink, because
2599 2676 the equivalent changelog revisions will be renumbered after the
2600 2677 strip.
2601 2678
2602 2679 So we truncate the revlog on the first of these revisions, and
2603 2680 trust that the caller has saved the revisions that shouldn't be
2604 2681 removed and that it'll re-add them after this truncation.
2605 2682 """
2606 2683 if len(self) == 0:
2607 2684 return
2608 2685
2609 2686 rev, _ = self.getstrippoint(minlink)
2610 2687 if rev == len(self):
2611 2688 return
2612 2689
2613 2690 # first truncate the files on disk
2614 2691 end = self.start(rev)
2615 2692 if not self._inline:
2616 2693 transaction.add(self.datafile, end)
2617 2694 end = rev * self._io.size
2618 2695 else:
2619 2696 end += rev * self._io.size
2620 2697
2621 2698 transaction.add(self.indexfile, end)
2622 2699
2623 2700 # then reset internal state in memory to forget those revisions
2624 2701 self._cache = None
2625 2702 self._chaininfocache = {}
2626 2703 self._chunkclear()
2627 2704 for x in xrange(rev, len(self)):
2628 2705 del self.nodemap[self.node(x)]
2629 2706
2630 2707 del self.index[rev:-1]
2631 2708 self._nodepos = None
2632 2709
2633 2710 def checksize(self):
2634 2711 expected = 0
2635 2712 if len(self):
2636 2713 expected = max(0, self.end(len(self) - 1))
2637 2714
2638 2715 try:
2639 2716 with self._datafp() as f:
2640 2717 f.seek(0, 2)
2641 2718 actual = f.tell()
2642 2719 dd = actual - expected
2643 2720 except IOError as inst:
2644 2721 if inst.errno != errno.ENOENT:
2645 2722 raise
2646 2723 dd = 0
2647 2724
2648 2725 try:
2649 2726 f = self.opener(self.indexfile)
2650 2727 f.seek(0, 2)
2651 2728 actual = f.tell()
2652 2729 f.close()
2653 2730 s = self._io.size
2654 2731 i = max(0, actual // s)
2655 2732 di = actual - (i * s)
2656 2733 if self._inline:
2657 2734 databytes = 0
2658 2735 for r in self:
2659 2736 databytes += max(0, self.length(r))
2660 2737 dd = 0
2661 2738 di = actual - len(self) * s - databytes
2662 2739 except IOError as inst:
2663 2740 if inst.errno != errno.ENOENT:
2664 2741 raise
2665 2742 di = 0
2666 2743
2667 2744 return (dd, di)
2668 2745
2669 2746 def files(self):
2670 2747 res = [self.indexfile]
2671 2748 if not self._inline:
2672 2749 res.append(self.datafile)
2673 2750 return res
2674 2751
2675 2752 DELTAREUSEALWAYS = 'always'
2676 2753 DELTAREUSESAMEREVS = 'samerevs'
2677 2754 DELTAREUSENEVER = 'never'
2678 2755
2679 2756 DELTAREUSEFULLADD = 'fulladd'
2680 2757
2681 2758 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2682 2759
2683 2760 def clone(self, tr, destrevlog, addrevisioncb=None,
2684 2761 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2685 2762 """Copy this revlog to another, possibly with format changes.
2686 2763
2687 2764 The destination revlog will contain the same revisions and nodes.
2688 2765 However, it may not be bit-for-bit identical due to e.g. delta encoding
2689 2766 differences.
2690 2767
2691 2768 The ``deltareuse`` argument control how deltas from the existing revlog
2692 2769 are preserved in the destination revlog. The argument can have the
2693 2770 following values:
2694 2771
2695 2772 DELTAREUSEALWAYS
2696 2773 Deltas will always be reused (if possible), even if the destination
2697 2774 revlog would not select the same revisions for the delta. This is the
2698 2775 fastest mode of operation.
2699 2776 DELTAREUSESAMEREVS
2700 2777 Deltas will be reused if the destination revlog would pick the same
2701 2778 revisions for the delta. This mode strikes a balance between speed
2702 2779 and optimization.
2703 2780 DELTAREUSENEVER
2704 2781 Deltas will never be reused. This is the slowest mode of execution.
2705 2782 This mode can be used to recompute deltas (e.g. if the diff/delta
2706 2783 algorithm changes).
2707 2784
2708 2785 Delta computation can be slow, so the choice of delta reuse policy can
2709 2786 significantly affect run time.
2710 2787
2711 2788 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2712 2789 two extremes. Deltas will be reused if they are appropriate. But if the
2713 2790 delta could choose a better revision, it will do so. This means if you
2714 2791 are converting a non-generaldelta revlog to a generaldelta revlog,
2715 2792 deltas will be recomputed if the delta's parent isn't a parent of the
2716 2793 revision.
2717 2794
2718 2795 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2719 2796 controls whether to compute deltas against both parents for merges.
2720 2797 By default, the current default is used.
2721 2798 """
2722 2799 if deltareuse not in self.DELTAREUSEALL:
2723 2800 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2724 2801
2725 2802 if len(destrevlog):
2726 2803 raise ValueError(_('destination revlog is not empty'))
2727 2804
2728 2805 if getattr(self, 'filteredrevs', None):
2729 2806 raise ValueError(_('source revlog has filtered revisions'))
2730 2807 if getattr(destrevlog, 'filteredrevs', None):
2731 2808 raise ValueError(_('destination revlog has filtered revisions'))
2732 2809
2733 2810 # lazydeltabase controls whether to reuse a cached delta, if possible.
2734 2811 oldlazydeltabase = destrevlog._lazydeltabase
2735 2812 oldamd = destrevlog._aggressivemergedeltas
2736 2813
2737 2814 try:
2738 2815 if deltareuse == self.DELTAREUSEALWAYS:
2739 2816 destrevlog._lazydeltabase = True
2740 2817 elif deltareuse == self.DELTAREUSESAMEREVS:
2741 2818 destrevlog._lazydeltabase = False
2742 2819
2743 2820 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2744 2821
2745 2822 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2746 2823 self.DELTAREUSESAMEREVS)
2747 2824
2748 2825 deltacomputer = _deltacomputer(destrevlog)
2749 2826 index = self.index
2750 2827 for rev in self:
2751 2828 entry = index[rev]
2752 2829
2753 2830 # Some classes override linkrev to take filtered revs into
2754 2831 # account. Use raw entry from index.
2755 2832 flags = entry[0] & 0xffff
2756 2833 linkrev = entry[4]
2757 2834 p1 = index[entry[5]][7]
2758 2835 p2 = index[entry[6]][7]
2759 2836 node = entry[7]
2760 2837
2761 2838 # (Possibly) reuse the delta from the revlog if allowed and
2762 2839 # the revlog chunk is a delta.
2763 2840 cachedelta = None
2764 2841 rawtext = None
2765 2842 if populatecachedelta:
2766 2843 dp = self.deltaparent(rev)
2767 2844 if dp != nullrev:
2768 2845 cachedelta = (dp, bytes(self._chunk(rev)))
2769 2846
2770 2847 if not cachedelta:
2771 2848 rawtext = self.revision(rev, raw=True)
2772 2849
2773 2850
2774 2851 if deltareuse == self.DELTAREUSEFULLADD:
2775 2852 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2776 2853 cachedelta=cachedelta,
2777 2854 node=node, flags=flags,
2778 2855 deltacomputer=deltacomputer)
2779 2856 else:
2780 2857 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2781 2858 checkambig=False)
2782 2859 dfh = None
2783 2860 if not destrevlog._inline:
2784 2861 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2785 2862 try:
2786 2863 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2787 2864 p2, flags, cachedelta, ifh, dfh,
2788 2865 deltacomputer=deltacomputer)
2789 2866 finally:
2790 2867 if dfh:
2791 2868 dfh.close()
2792 2869 ifh.close()
2793 2870
2794 2871 if addrevisioncb:
2795 2872 addrevisioncb(self, rev, node)
2796 2873 finally:
2797 2874 destrevlog._lazydeltabase = oldlazydeltabase
2798 2875 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now