##// END OF EJS Templates
revlog: fix typo in 'buildtext' name...
Boris Feld -
r39228:dbc5ead9 default
parent child Browse files
Show More
@@ -1,3076 +1,3076
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullhex,
31 31 nullid,
32 32 nullrev,
33 33 wdirfilenodeids,
34 34 wdirhex,
35 35 wdirid,
36 36 wdirrev,
37 37 )
38 38 from .i18n import _
39 39 from .thirdparty import (
40 40 attr,
41 41 )
42 42 from . import (
43 43 ancestor,
44 44 error,
45 45 mdiff,
46 46 policy,
47 47 pycompat,
48 48 templatefilters,
49 49 util,
50 50 )
51 51 from .utils import (
52 52 stringutil,
53 53 )
54 54
55 55 parsers = policy.importmod(r'parsers')
56 56
57 57 # Aliased for performance.
58 58 _zlibdecompress = zlib.decompress
59 59
60 60 # revlog header flags
61 61 REVLOGV0 = 0
62 62 REVLOGV1 = 1
63 63 # Dummy value until file format is finalized.
64 64 # Reminder: change the bounds check in revlog.__init__ when this is changed.
65 65 REVLOGV2 = 0xDEAD
66 66 FLAG_INLINE_DATA = (1 << 16)
67 67 FLAG_GENERALDELTA = (1 << 17)
68 68 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
69 69 REVLOG_DEFAULT_FORMAT = REVLOGV1
70 70 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
71 71 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
72 72 REVLOGV2_FLAGS = REVLOGV1_FLAGS
73 73
74 74 # revlog index flags
75 75 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
76 76 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
77 77 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
78 78 REVIDX_DEFAULT_FLAGS = 0
79 79 # stable order in which flags need to be processed and their processors applied
80 80 REVIDX_FLAGS_ORDER = [
81 81 REVIDX_ISCENSORED,
82 82 REVIDX_ELLIPSIS,
83 83 REVIDX_EXTSTORED,
84 84 ]
85 85 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
86 86 # bitmark for flags that could cause rawdata content change
87 87 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
88 88
89 89 # max size of revlog with inline data
90 90 _maxinline = 131072
91 91 _chunksize = 1048576
92 92
93 93 RevlogError = error.RevlogError
94 94 LookupError = error.LookupError
95 95 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
96 96 CensoredNodeError = error.CensoredNodeError
97 97 ProgrammingError = error.ProgrammingError
98 98
99 99 # Store flag processors (cf. 'addflagprocessor()' to register)
100 100 _flagprocessors = {
101 101 REVIDX_ISCENSORED: None,
102 102 }
103 103
104 104 _mdre = re.compile('\1\n')
105 105 def parsemeta(text):
106 106 """return (metadatadict, metadatasize)"""
107 107 # text can be buffer, so we can't use .startswith or .index
108 108 if text[:2] != '\1\n':
109 109 return None, None
110 110 s = _mdre.search(text, 2).start()
111 111 mtext = text[2:s]
112 112 meta = {}
113 113 for l in mtext.splitlines():
114 114 k, v = l.split(": ", 1)
115 115 meta[k] = v
116 116 return meta, (s + 2)
117 117
118 118 def packmeta(meta, text):
119 119 keys = sorted(meta)
120 120 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
121 121 return "\1\n%s\1\n%s" % (metatext, text)
122 122
123 123 def _censoredtext(text):
124 124 m, offs = parsemeta(text)
125 125 return m and "censored" in m
126 126
127 127 def addflagprocessor(flag, processor):
128 128 """Register a flag processor on a revision data flag.
129 129
130 130 Invariant:
131 131 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
132 132 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
133 133 - Only one flag processor can be registered on a specific flag.
134 134 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
135 135 following signatures:
136 136 - (read) f(self, rawtext) -> text, bool
137 137 - (write) f(self, text) -> rawtext, bool
138 138 - (raw) f(self, rawtext) -> bool
139 139 "text" is presented to the user. "rawtext" is stored in revlog data, not
140 140 directly visible to the user.
141 141 The boolean returned by these transforms is used to determine whether
142 142 the returned text can be used for hash integrity checking. For example,
143 143 if "write" returns False, then "text" is used to generate hash. If
144 144 "write" returns True, that basically means "rawtext" returned by "write"
145 145 should be used to generate hash. Usually, "write" and "read" return
146 146 different booleans. And "raw" returns a same boolean as "write".
147 147
148 148 Note: The 'raw' transform is used for changegroup generation and in some
149 149 debug commands. In this case the transform only indicates whether the
150 150 contents can be used for hash integrity checks.
151 151 """
152 152 if not flag & REVIDX_KNOWN_FLAGS:
153 153 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
154 154 raise ProgrammingError(msg)
155 155 if flag not in REVIDX_FLAGS_ORDER:
156 156 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
157 157 raise ProgrammingError(msg)
158 158 if flag in _flagprocessors:
159 159 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
160 160 raise error.Abort(msg)
161 161 _flagprocessors[flag] = processor
162 162
163 163 def getoffset(q):
164 164 return int(q >> 16)
165 165
166 166 def gettype(q):
167 167 return int(q & 0xFFFF)
168 168
169 169 def offset_type(offset, type):
170 170 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
171 171 raise ValueError('unknown revlog index flags')
172 172 return int(int(offset) << 16 | type)
173 173
174 174 _nullhash = hashlib.sha1(nullid)
175 175
176 176 def hash(text, p1, p2):
177 177 """generate a hash from the given text and its parent hashes
178 178
179 179 This hash combines both the current file contents and its history
180 180 in a manner that makes it easy to distinguish nodes with the same
181 181 content in the revision graph.
182 182 """
183 183 # As of now, if one of the parent node is null, p2 is null
184 184 if p2 == nullid:
185 185 # deep copy of a hash is faster than creating one
186 186 s = _nullhash.copy()
187 187 s.update(p1)
188 188 else:
189 189 # none of the parent nodes are nullid
190 190 if p1 < p2:
191 191 a = p1
192 192 b = p2
193 193 else:
194 194 a = p2
195 195 b = p1
196 196 s = hashlib.sha1(a)
197 197 s.update(b)
198 198 s.update(text)
199 199 return s.digest()
200 200
201 201 class _testrevlog(object):
202 202 """minimalist fake revlog to use in doctests"""
203 203
204 204 def __init__(self, data, density=0.5, mingap=0):
205 205 """data is an list of revision payload boundaries"""
206 206 self._data = data
207 207 self._srdensitythreshold = density
208 208 self._srmingapsize = mingap
209 209
210 210 def start(self, rev):
211 211 if rev == 0:
212 212 return 0
213 213 return self._data[rev - 1]
214 214
215 215 def end(self, rev):
216 216 return self._data[rev]
217 217
218 218 def length(self, rev):
219 219 return self.end(rev) - self.start(rev)
220 220
221 221 def __len__(self):
222 222 return len(self._data)
223 223
224 224 def _trimchunk(revlog, revs, startidx, endidx=None):
225 225 """returns revs[startidx:endidx] without empty trailing revs
226 226
227 227 Doctest Setup
228 228 >>> revlog = _testrevlog([
229 229 ... 5, #0
230 230 ... 10, #1
231 231 ... 12, #2
232 232 ... 12, #3 (empty)
233 233 ... 17, #4
234 234 ... 21, #5
235 235 ... 21, #6 (empty)
236 236 ... ])
237 237
238 238 Contiguous cases:
239 239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
240 240 [0, 1, 2, 3, 4, 5]
241 241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
242 242 [0, 1, 2, 3, 4]
243 243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
244 244 [0, 1, 2]
245 245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
246 246 [2]
247 247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
248 248 [3, 4, 5]
249 249 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
250 250 [3, 4]
251 251
252 252 Discontiguous cases:
253 253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
254 254 [1, 3, 5]
255 255 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
256 256 [1]
257 257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
258 258 [3, 5]
259 259 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
260 260 [3, 5]
261 261 """
262 262 length = revlog.length
263 263
264 264 if endidx is None:
265 265 endidx = len(revs)
266 266
267 267 # If we have a non-emtpy delta candidate, there are nothing to trim
268 268 if revs[endidx - 1] < len(revlog):
269 269 # Trim empty revs at the end, except the very first revision of a chain
270 270 while (endidx > 1
271 271 and endidx > startidx
272 272 and length(revs[endidx - 1]) == 0):
273 273 endidx -= 1
274 274
275 275 return revs[startidx:endidx]
276 276
277 277 def _segmentspan(revlog, revs, deltainfo=None):
278 278 """Get the byte span of a segment of revisions
279 279
280 280 revs is a sorted array of revision numbers
281 281
282 282 >>> revlog = _testrevlog([
283 283 ... 5, #0
284 284 ... 10, #1
285 285 ... 12, #2
286 286 ... 12, #3 (empty)
287 287 ... 17, #4
288 288 ... ])
289 289
290 290 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
291 291 17
292 292 >>> _segmentspan(revlog, [0, 4])
293 293 17
294 294 >>> _segmentspan(revlog, [3, 4])
295 295 5
296 296 >>> _segmentspan(revlog, [1, 2, 3,])
297 297 7
298 298 >>> _segmentspan(revlog, [1, 3])
299 299 7
300 300 """
301 301 if not revs:
302 302 return 0
303 303 if deltainfo is not None and len(revlog) <= revs[-1]:
304 304 if len(revs) == 1:
305 305 return deltainfo.deltalen
306 306 offset = revlog.end(len(revlog) - 1)
307 307 end = deltainfo.deltalen + offset
308 308 else:
309 309 end = revlog.end(revs[-1])
310 310 return end - revlog.start(revs[0])
311 311
312 312 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
313 313 """slice revs to reduce the amount of unrelated data to be read from disk.
314 314
315 315 ``revs`` is sliced into groups that should be read in one time.
316 316 Assume that revs are sorted.
317 317
318 318 The initial chunk is sliced until the overall density (payload/chunks-span
319 319 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
320 320 `revlog._srmingapsize` is skipped.
321 321
322 322 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
323 323 For consistency with other slicing choice, this limit won't go lower than
324 324 `revlog._srmingapsize`.
325 325
326 326 If individual revisions chunk are larger than this limit, they will still
327 327 be raised individually.
328 328
329 329 >>> revlog = _testrevlog([
330 330 ... 5, #00 (5)
331 331 ... 10, #01 (5)
332 332 ... 12, #02 (2)
333 333 ... 12, #03 (empty)
334 334 ... 27, #04 (15)
335 335 ... 31, #05 (4)
336 336 ... 31, #06 (empty)
337 337 ... 42, #07 (11)
338 338 ... 47, #08 (5)
339 339 ... 47, #09 (empty)
340 340 ... 48, #10 (1)
341 341 ... 51, #11 (3)
342 342 ... 74, #12 (23)
343 343 ... 85, #13 (11)
344 344 ... 86, #14 (1)
345 345 ... 91, #15 (5)
346 346 ... ])
347 347
348 348 >>> list(_slicechunk(revlog, list(range(16))))
349 349 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
350 350 >>> list(_slicechunk(revlog, [0, 15]))
351 351 [[0], [15]]
352 352 >>> list(_slicechunk(revlog, [0, 11, 15]))
353 353 [[0], [11], [15]]
354 354 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
355 355 [[0], [11, 13, 15]]
356 356 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
357 357 [[1, 2], [5, 8, 10, 11], [14]]
358 358
359 359 Slicing with a maximum chunk size
360 360 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
361 361 [[0], [11], [13], [15]]
362 362 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
363 363 [[0], [11], [13, 15]]
364 364 """
365 365 if targetsize is not None:
366 366 targetsize = max(targetsize, revlog._srmingapsize)
367 367 # targetsize should not be specified when evaluating delta candidates:
368 368 # * targetsize is used to ensure we stay within specification when reading,
369 369 # * deltainfo is used to pick are good delta chain when writing.
370 370 if not (deltainfo is None or targetsize is None):
371 371 msg = 'cannot use `targetsize` with a `deltainfo`'
372 372 raise error.ProgrammingError(msg)
373 373 for chunk in _slicechunktodensity(revlog, revs,
374 374 deltainfo,
375 375 revlog._srdensitythreshold,
376 376 revlog._srmingapsize):
377 377 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
378 378 yield subchunk
379 379
380 380 def _slicechunktosize(revlog, revs, targetsize=None):
381 381 """slice revs to match the target size
382 382
383 383 This is intended to be used on chunk that density slicing selected by that
384 384 are still too large compared to the read garantee of revlog. This might
385 385 happens when "minimal gap size" interrupted the slicing or when chain are
386 386 built in a way that create large blocks next to each other.
387 387
388 388 >>> revlog = _testrevlog([
389 389 ... 3, #0 (3)
390 390 ... 5, #1 (2)
391 391 ... 6, #2 (1)
392 392 ... 8, #3 (2)
393 393 ... 8, #4 (empty)
394 394 ... 11, #5 (3)
395 395 ... 12, #6 (1)
396 396 ... 13, #7 (1)
397 397 ... 14, #8 (1)
398 398 ... ])
399 399
400 400 Cases where chunk is already small enough
401 401 >>> list(_slicechunktosize(revlog, [0], 3))
402 402 [[0]]
403 403 >>> list(_slicechunktosize(revlog, [6, 7], 3))
404 404 [[6, 7]]
405 405 >>> list(_slicechunktosize(revlog, [0], None))
406 406 [[0]]
407 407 >>> list(_slicechunktosize(revlog, [6, 7], None))
408 408 [[6, 7]]
409 409
410 410 cases where we need actual slicing
411 411 >>> list(_slicechunktosize(revlog, [0, 1], 3))
412 412 [[0], [1]]
413 413 >>> list(_slicechunktosize(revlog, [1, 3], 3))
414 414 [[1], [3]]
415 415 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
416 416 [[1, 2], [3]]
417 417 >>> list(_slicechunktosize(revlog, [3, 5], 3))
418 418 [[3], [5]]
419 419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
420 420 [[3], [5]]
421 421 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
422 422 [[5], [6, 7, 8]]
423 423 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
424 424 [[0], [1, 2], [3], [5], [6, 7, 8]]
425 425
426 426 Case with too large individual chunk (must return valid chunk)
427 427 >>> list(_slicechunktosize(revlog, [0, 1], 2))
428 428 [[0], [1]]
429 429 >>> list(_slicechunktosize(revlog, [1, 3], 1))
430 430 [[1], [3]]
431 431 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
432 432 [[3], [5]]
433 433 """
434 434 assert targetsize is None or 0 <= targetsize
435 435 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
436 436 yield revs
437 437 return
438 438
439 439 startrevidx = 0
440 440 startdata = revlog.start(revs[0])
441 441 endrevidx = 0
442 442 iterrevs = enumerate(revs)
443 443 next(iterrevs) # skip first rev.
444 444 for idx, r in iterrevs:
445 445 span = revlog.end(r) - startdata
446 446 if span <= targetsize:
447 447 endrevidx = idx
448 448 else:
449 449 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
450 450 if chunk:
451 451 yield chunk
452 452 startrevidx = idx
453 453 startdata = revlog.start(r)
454 454 endrevidx = idx
455 455 yield _trimchunk(revlog, revs, startrevidx)
456 456
457 457 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
458 458 mingapsize=0):
459 459 """slice revs to reduce the amount of unrelated data to be read from disk.
460 460
461 461 ``revs`` is sliced into groups that should be read in one time.
462 462 Assume that revs are sorted.
463 463
464 464 ``deltainfo`` is a _deltainfo instance of a revision that we would append
465 465 to the top of the revlog.
466 466
467 467 The initial chunk is sliced until the overall density (payload/chunks-span
468 468 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
469 469 skipped.
470 470
471 471 >>> revlog = _testrevlog([
472 472 ... 5, #00 (5)
473 473 ... 10, #01 (5)
474 474 ... 12, #02 (2)
475 475 ... 12, #03 (empty)
476 476 ... 27, #04 (15)
477 477 ... 31, #05 (4)
478 478 ... 31, #06 (empty)
479 479 ... 42, #07 (11)
480 480 ... 47, #08 (5)
481 481 ... 47, #09 (empty)
482 482 ... 48, #10 (1)
483 483 ... 51, #11 (3)
484 484 ... 74, #12 (23)
485 485 ... 85, #13 (11)
486 486 ... 86, #14 (1)
487 487 ... 91, #15 (5)
488 488 ... ])
489 489
490 490 >>> list(_slicechunktodensity(revlog, list(range(16))))
491 491 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
492 492 >>> list(_slicechunktodensity(revlog, [0, 15]))
493 493 [[0], [15]]
494 494 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
495 495 [[0], [11], [15]]
496 496 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
497 497 [[0], [11, 13, 15]]
498 498 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
499 499 [[1, 2], [5, 8, 10, 11], [14]]
500 500 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
501 501 ... mingapsize=20))
502 502 [[1, 2, 3, 5, 8, 10, 11], [14]]
503 503 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
504 504 ... targetdensity=0.95))
505 505 [[1, 2], [5], [8, 10, 11], [14]]
506 506 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
507 507 ... targetdensity=0.95, mingapsize=12))
508 508 [[1, 2], [5, 8, 10, 11], [14]]
509 509 """
510 510 start = revlog.start
511 511 length = revlog.length
512 512
513 513 if len(revs) <= 1:
514 514 yield revs
515 515 return
516 516
517 517 nextrev = len(revlog)
518 518 nextoffset = revlog.end(nextrev - 1)
519 519
520 520 if deltainfo is None:
521 521 deltachainspan = _segmentspan(revlog, revs)
522 522 chainpayload = sum(length(r) for r in revs)
523 523 else:
524 524 deltachainspan = deltainfo.distance
525 525 chainpayload = deltainfo.compresseddeltalen
526 526
527 527 if deltachainspan < mingapsize:
528 528 yield revs
529 529 return
530 530
531 531 readdata = deltachainspan
532 532
533 533 if deltachainspan:
534 534 density = chainpayload / float(deltachainspan)
535 535 else:
536 536 density = 1.0
537 537
538 538 if density >= targetdensity:
539 539 yield revs
540 540 return
541 541
542 542 if deltainfo is not None and deltainfo.deltalen:
543 543 revs = list(revs)
544 544 revs.append(nextrev)
545 545
546 546 # Store the gaps in a heap to have them sorted by decreasing size
547 547 gapsheap = []
548 548 heapq.heapify(gapsheap)
549 549 prevend = None
550 550 for i, rev in enumerate(revs):
551 551 if rev < nextrev:
552 552 revstart = start(rev)
553 553 revlen = length(rev)
554 554 else:
555 555 revstart = nextoffset
556 556 revlen = deltainfo.deltalen
557 557
558 558 # Skip empty revisions to form larger holes
559 559 if revlen == 0:
560 560 continue
561 561
562 562 if prevend is not None:
563 563 gapsize = revstart - prevend
564 564 # only consider holes that are large enough
565 565 if gapsize > mingapsize:
566 566 heapq.heappush(gapsheap, (-gapsize, i))
567 567
568 568 prevend = revstart + revlen
569 569
570 570 # Collect the indices of the largest holes until the density is acceptable
571 571 indicesheap = []
572 572 heapq.heapify(indicesheap)
573 573 while gapsheap and density < targetdensity:
574 574 oppgapsize, gapidx = heapq.heappop(gapsheap)
575 575
576 576 heapq.heappush(indicesheap, gapidx)
577 577
578 578 # the gap sizes are stored as negatives to be sorted decreasingly
579 579 # by the heap
580 580 readdata -= (-oppgapsize)
581 581 if readdata > 0:
582 582 density = chainpayload / float(readdata)
583 583 else:
584 584 density = 1.0
585 585
586 586 # Cut the revs at collected indices
587 587 previdx = 0
588 588 while indicesheap:
589 589 idx = heapq.heappop(indicesheap)
590 590
591 591 chunk = _trimchunk(revlog, revs, previdx, idx)
592 592 if chunk:
593 593 yield chunk
594 594
595 595 previdx = idx
596 596
597 597 chunk = _trimchunk(revlog, revs, previdx)
598 598 if chunk:
599 599 yield chunk
600 600
601 601 @attr.s(slots=True, frozen=True)
602 602 class _deltainfo(object):
603 603 distance = attr.ib()
604 604 deltalen = attr.ib()
605 605 data = attr.ib()
606 606 base = attr.ib()
607 607 chainbase = attr.ib()
608 608 chainlen = attr.ib()
609 609 compresseddeltalen = attr.ib()
610 610 snapshotdepth = attr.ib()
611 611
612 612 class _deltacomputer(object):
613 613 def __init__(self, revlog):
614 614 self.revlog = revlog
615 615
616 616 def _getcandidaterevs(self, p1, p2, cachedelta):
617 617 """
618 618 Provides revisions that present an interest to be diffed against,
619 619 grouped by level of easiness.
620 620 """
621 621 revlog = self.revlog
622 622 gdelta = revlog._generaldelta
623 623 curr = len(revlog)
624 624 prev = curr - 1
625 625 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
626 626
627 627 # should we try to build a delta?
628 628 if prev != nullrev and revlog.storedeltachains:
629 629 tested = set()
630 630 # This condition is true most of the time when processing
631 631 # changegroup data into a generaldelta repo. The only time it
632 632 # isn't true is if this is the first revision in a delta chain
633 633 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
634 634 if cachedelta and gdelta and revlog._lazydeltabase:
635 635 # Assume what we received from the server is a good choice
636 636 # build delta will reuse the cache
637 637 yield (cachedelta[0],)
638 638 tested.add(cachedelta[0])
639 639
640 640 if gdelta:
641 641 # exclude already lazy tested base if any
642 642 parents = [p for p in (p1r, p2r)
643 643 if p != nullrev and p not in tested]
644 644
645 645 if not revlog._deltabothparents and len(parents) == 2:
646 646 parents.sort()
647 647 # To minimize the chance of having to build a fulltext,
648 648 # pick first whichever parent is closest to us (max rev)
649 649 yield (parents[1],)
650 650 # then the other one (min rev) if the first did not fit
651 651 yield (parents[0],)
652 652 tested.update(parents)
653 653 elif len(parents) > 0:
654 654 # Test all parents (1 or 2), and keep the best candidate
655 655 yield parents
656 656 tested.update(parents)
657 657
658 658 if prev not in tested:
659 659 # other approach failed try against prev to hopefully save us a
660 660 # fulltext.
661 661 yield (prev,)
662 662 tested.add(prev)
663 663
664 664 def buildtext(self, revinfo, fh):
665 665 """Builds a fulltext version of a revision
666 666
667 667 revinfo: _revisioninfo instance that contains all needed info
668 668 fh: file handle to either the .i or the .d revlog file,
669 669 depending on whether it is inlined or not
670 670 """
671 671 btext = revinfo.btext
672 672 if btext[0] is not None:
673 673 return btext[0]
674 674
675 675 revlog = self.revlog
676 676 cachedelta = revinfo.cachedelta
677 677 flags = revinfo.flags
678 678 node = revinfo.node
679 679
680 680 baserev = cachedelta[0]
681 681 delta = cachedelta[1]
682 682 # special case deltas which replace entire base; no need to decode
683 683 # base revision. this neatly avoids censored bases, which throw when
684 684 # they're decoded.
685 685 hlen = struct.calcsize(">lll")
686 686 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
687 687 len(delta) - hlen):
688 688 btext[0] = delta[hlen:]
689 689 else:
690 690 # deltabase is rawtext before changed by flag processors, which is
691 691 # equivalent to non-raw text
692 692 basetext = revlog.revision(baserev, _df=fh, raw=False)
693 693 btext[0] = mdiff.patch(basetext, delta)
694 694
695 695 try:
696 696 res = revlog._processflags(btext[0], flags, 'read', raw=True)
697 697 btext[0], validatehash = res
698 698 if validatehash:
699 699 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
700 700 if flags & REVIDX_ISCENSORED:
701 701 raise RevlogError(_('node %s is not censored') % node)
702 702 except CensoredNodeError:
703 703 # must pass the censored index flag to add censored revisions
704 704 if not flags & REVIDX_ISCENSORED:
705 705 raise
706 706 return btext[0]
707 707
708 708 def _builddeltadiff(self, base, revinfo, fh):
709 709 revlog = self.revlog
710 710 t = self.buildtext(revinfo, fh)
711 711 if revlog.iscensored(base):
712 712 # deltas based on a censored revision must replace the
713 713 # full content in one patch, so delta works everywhere
714 714 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
715 715 delta = header + t
716 716 else:
717 717 ptext = revlog.revision(base, _df=fh, raw=True)
718 718 delta = mdiff.textdiff(ptext, t)
719 719
720 720 return delta
721 721
722 722 def _builddeltainfo(self, revinfo, base, fh):
723 723 # can we use the cached delta?
724 724 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
725 725 delta = revinfo.cachedelta[1]
726 726 else:
727 727 delta = self._builddeltadiff(base, revinfo, fh)
728 728 revlog = self.revlog
729 729 header, data = revlog.compress(delta)
730 730 deltalen = len(header) + len(data)
731 731 chainbase = revlog.chainbase(base)
732 732 offset = revlog.end(len(revlog) - 1)
733 733 dist = deltalen + offset - revlog.start(chainbase)
734 734 if revlog._generaldelta:
735 735 deltabase = base
736 736 else:
737 737 deltabase = chainbase
738 738 chainlen, compresseddeltalen = revlog._chaininfo(base)
739 739 chainlen += 1
740 740 compresseddeltalen += deltalen
741 741
742 742 revlog = self.revlog
743 743 snapshotdepth = None
744 744 if deltabase == nullrev:
745 745 snapshotdepth = 0
746 746 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
747 747 # A delta chain should always be one full snapshot,
748 748 # zero or more semi-snapshots, and zero or more deltas
749 749 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
750 750 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
751 751 snapshotdepth = len(revlog._deltachain(deltabase)[0])
752 752
753 753 return _deltainfo(dist, deltalen, (header, data), deltabase,
754 754 chainbase, chainlen, compresseddeltalen,
755 755 snapshotdepth)
756 756
757 757 def finddeltainfo(self, revinfo, fh):
758 758 """Find an acceptable delta against a candidate revision
759 759
760 760 revinfo: information about the revision (instance of _revisioninfo)
761 761 fh: file handle to either the .i or the .d revlog file,
762 762 depending on whether it is inlined or not
763 763
764 764 Returns the first acceptable candidate revision, as ordered by
765 765 _getcandidaterevs
766 766 """
767 767 if not revinfo.textlen:
768 768 return None # empty file do not need delta
769 769
770 770 cachedelta = revinfo.cachedelta
771 771 p1 = revinfo.p1
772 772 p2 = revinfo.p2
773 773 revlog = self.revlog
774 774
775 775 deltalength = self.revlog.length
776 776 deltaparent = self.revlog.deltaparent
777 777
778 778 deltainfo = None
779 779 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
780 780 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
781 781 # filter out delta base that will never produce good delta
782 782 candidaterevs = [r for r in candidaterevs
783 783 if self.revlog.length(r) <= deltas_limit]
784 784 nominateddeltas = []
785 785 for candidaterev in candidaterevs:
786 786 # skip over empty delta (no need to include them in a chain)
787 787 while candidaterev != nullrev and not deltalength(candidaterev):
788 788 candidaterev = deltaparent(candidaterev)
789 789 # no need to try a delta against nullid, this will be handled
790 790 # by fulltext later.
791 791 if candidaterev == nullrev:
792 792 continue
793 793 # no delta for rawtext-changing revs (see "candelta" for why)
794 794 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
795 795 continue
796 796 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
797 797 if revlog._isgooddeltainfo(candidatedelta, revinfo):
798 798 nominateddeltas.append(candidatedelta)
799 799 if nominateddeltas:
800 800 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
801 801 break
802 802
803 803 return deltainfo
804 804
805 805 @attr.s(slots=True, frozen=True)
806 806 class _revisioninfo(object):
807 807 """Information about a revision that allows building its fulltext
808 808 node: expected hash of the revision
809 809 p1, p2: parent revs of the revision
810 810 btext: built text cache consisting of a one-element list
811 811 cachedelta: (baserev, uncompressed_delta) or None
812 812 flags: flags associated to the revision storage
813 813
814 814 One of btext[0] or cachedelta must be set.
815 815 """
816 816 node = attr.ib()
817 817 p1 = attr.ib()
818 818 p2 = attr.ib()
819 819 btext = attr.ib()
820 820 textlen = attr.ib()
821 821 cachedelta = attr.ib()
822 822 flags = attr.ib()
823 823
824 824 # index v0:
825 825 # 4 bytes: offset
826 826 # 4 bytes: compressed length
827 827 # 4 bytes: base rev
828 828 # 4 bytes: link rev
829 829 # 20 bytes: parent 1 nodeid
830 830 # 20 bytes: parent 2 nodeid
831 831 # 20 bytes: nodeid
832 832 indexformatv0 = struct.Struct(">4l20s20s20s")
833 833 indexformatv0_pack = indexformatv0.pack
834 834 indexformatv0_unpack = indexformatv0.unpack
835 835
836 836 class revlogoldindex(list):
837 837 def __getitem__(self, i):
838 838 if i == -1:
839 839 return (0, 0, 0, -1, -1, -1, -1, nullid)
840 840 return list.__getitem__(self, i)
841 841
842 842 # maximum <delta-chain-data>/<revision-text-length> ratio
843 843 LIMIT_DELTA2TEXT = 2
844 844
845 845 class revlogoldio(object):
846 846 def __init__(self):
847 847 self.size = indexformatv0.size
848 848
849 849 def parseindex(self, data, inline):
850 850 s = self.size
851 851 index = []
852 852 nodemap = {nullid: nullrev}
853 853 n = off = 0
854 854 l = len(data)
855 855 while off + s <= l:
856 856 cur = data[off:off + s]
857 857 off += s
858 858 e = indexformatv0_unpack(cur)
859 859 # transform to revlogv1 format
860 860 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
861 861 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
862 862 index.append(e2)
863 863 nodemap[e[6]] = n
864 864 n += 1
865 865
866 866 return revlogoldindex(index), nodemap, None
867 867
868 868 def packentry(self, entry, node, version, rev):
869 869 if gettype(entry[0]):
870 870 raise RevlogError(_('index entry flags need revlog version 1'))
871 871 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
872 872 node(entry[5]), node(entry[6]), entry[7])
873 873 return indexformatv0_pack(*e2)
874 874
875 875 # index ng:
876 876 # 6 bytes: offset
877 877 # 2 bytes: flags
878 878 # 4 bytes: compressed length
879 879 # 4 bytes: uncompressed length
880 880 # 4 bytes: base rev
881 881 # 4 bytes: link rev
882 882 # 4 bytes: parent 1 rev
883 883 # 4 bytes: parent 2 rev
884 884 # 32 bytes: nodeid
885 885 indexformatng = struct.Struct(">Qiiiiii20s12x")
886 886 indexformatng_pack = indexformatng.pack
887 887 versionformat = struct.Struct(">I")
888 888 versionformat_pack = versionformat.pack
889 889 versionformat_unpack = versionformat.unpack
890 890
891 891 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
892 892 # signed integer)
893 893 _maxentrysize = 0x7fffffff
894 894
895 895 class revlogio(object):
896 896 def __init__(self):
897 897 self.size = indexformatng.size
898 898
899 899 def parseindex(self, data, inline):
900 900 # call the C implementation to parse the index data
901 901 index, cache = parsers.parse_index2(data, inline)
902 902 return index, getattr(index, 'nodemap', None), cache
903 903
904 904 def packentry(self, entry, node, version, rev):
905 905 p = indexformatng_pack(*entry)
906 906 if rev == 0:
907 907 p = versionformat_pack(version) + p[4:]
908 908 return p
909 909
910 910 class revlog(object):
911 911 """
912 912 the underlying revision storage object
913 913
914 914 A revlog consists of two parts, an index and the revision data.
915 915
916 916 The index is a file with a fixed record size containing
917 917 information on each revision, including its nodeid (hash), the
918 918 nodeids of its parents, the position and offset of its data within
919 919 the data file, and the revision it's based on. Finally, each entry
920 920 contains a linkrev entry that can serve as a pointer to external
921 921 data.
922 922
923 923 The revision data itself is a linear collection of data chunks.
924 924 Each chunk represents a revision and is usually represented as a
925 925 delta against the previous chunk. To bound lookup time, runs of
926 926 deltas are limited to about 2 times the length of the original
927 927 version data. This makes retrieval of a version proportional to
928 928 its size, or O(1) relative to the number of revisions.
929 929
930 930 Both pieces of the revlog are written to in an append-only
931 931 fashion, which means we never need to rewrite a file to insert or
932 932 remove data, and can use some simple techniques to avoid the need
933 933 for locking while reading.
934 934
935 935 If checkambig, indexfile is opened with checkambig=True at
936 936 writing, to avoid file stat ambiguity.
937 937
938 938 If mmaplargeindex is True, and an mmapindexthreshold is set, the
939 939 index will be mmapped rather than read if it is larger than the
940 940 configured threshold.
941 941
942 942 If censorable is True, the revlog can have censored revisions.
943 943 """
944 944 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
945 945 mmaplargeindex=False, censorable=False):
946 946 """
947 947 create a revlog object
948 948
949 949 opener is a function that abstracts the file opening operation
950 950 and can be used to implement COW semantics or the like.
951 951 """
952 952 self.indexfile = indexfile
953 953 self.datafile = datafile or (indexfile[:-2] + ".d")
954 954 self.opener = opener
955 955 # When True, indexfile is opened with checkambig=True at writing, to
956 956 # avoid file stat ambiguity.
957 957 self._checkambig = checkambig
958 958 self._censorable = censorable
959 959 # 3-tuple of (node, rev, text) for a raw revision.
960 960 self._cache = None
961 961 # Maps rev to chain base rev.
962 962 self._chainbasecache = util.lrucachedict(100)
963 963 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
964 964 self._chunkcache = (0, '')
965 965 # How much data to read and cache into the raw revlog data cache.
966 966 self._chunkcachesize = 65536
967 967 self._maxchainlen = None
968 968 self._deltabothparents = True
969 969 self.index = []
970 970 # Mapping of partial identifiers to full nodes.
971 971 self._pcache = {}
972 972 # Mapping of revision integer to full node.
973 973 self._nodecache = {nullid: nullrev}
974 974 self._nodepos = None
975 975 self._compengine = 'zlib'
976 976 self._maxdeltachainspan = -1
977 977 self._withsparseread = False
978 978 self._sparserevlog = False
979 979 self._srdensitythreshold = 0.50
980 980 self._srmingapsize = 262144
981 981
982 982 mmapindexthreshold = None
983 983 v = REVLOG_DEFAULT_VERSION
984 984 opts = getattr(opener, 'options', None)
985 985 if opts is not None:
986 986 if 'revlogv2' in opts:
987 987 # version 2 revlogs always use generaldelta.
988 988 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
989 989 elif 'revlogv1' in opts:
990 990 if 'generaldelta' in opts:
991 991 v |= FLAG_GENERALDELTA
992 992 else:
993 993 v = 0
994 994 if 'chunkcachesize' in opts:
995 995 self._chunkcachesize = opts['chunkcachesize']
996 996 if 'maxchainlen' in opts:
997 997 self._maxchainlen = opts['maxchainlen']
998 998 if 'deltabothparents' in opts:
999 999 self._deltabothparents = opts['deltabothparents']
1000 1000 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1001 1001 if 'compengine' in opts:
1002 1002 self._compengine = opts['compengine']
1003 1003 if 'maxdeltachainspan' in opts:
1004 1004 self._maxdeltachainspan = opts['maxdeltachainspan']
1005 1005 if mmaplargeindex and 'mmapindexthreshold' in opts:
1006 1006 mmapindexthreshold = opts['mmapindexthreshold']
1007 1007 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1008 1008 withsparseread = bool(opts.get('with-sparse-read', False))
1009 1009 # sparse-revlog forces sparse-read
1010 1010 self._withsparseread = self._sparserevlog or withsparseread
1011 1011 if 'sparse-read-density-threshold' in opts:
1012 1012 self._srdensitythreshold = opts['sparse-read-density-threshold']
1013 1013 if 'sparse-read-min-gap-size' in opts:
1014 1014 self._srmingapsize = opts['sparse-read-min-gap-size']
1015 1015
1016 1016 if self._chunkcachesize <= 0:
1017 1017 raise RevlogError(_('revlog chunk cache size %r is not greater '
1018 1018 'than 0') % self._chunkcachesize)
1019 1019 elif self._chunkcachesize & (self._chunkcachesize - 1):
1020 1020 raise RevlogError(_('revlog chunk cache size %r is not a power '
1021 1021 'of 2') % self._chunkcachesize)
1022 1022
1023 1023 indexdata = ''
1024 1024 self._initempty = True
1025 1025 try:
1026 1026 with self._indexfp() as f:
1027 1027 if (mmapindexthreshold is not None and
1028 1028 self.opener.fstat(f).st_size >= mmapindexthreshold):
1029 1029 indexdata = util.buffer(util.mmapread(f))
1030 1030 else:
1031 1031 indexdata = f.read()
1032 1032 if len(indexdata) > 0:
1033 1033 v = versionformat_unpack(indexdata[:4])[0]
1034 1034 self._initempty = False
1035 1035 except IOError as inst:
1036 1036 if inst.errno != errno.ENOENT:
1037 1037 raise
1038 1038
1039 1039 self.version = v
1040 1040 self._inline = v & FLAG_INLINE_DATA
1041 1041 self._generaldelta = v & FLAG_GENERALDELTA
1042 1042 flags = v & ~0xFFFF
1043 1043 fmt = v & 0xFFFF
1044 1044 if fmt == REVLOGV0:
1045 1045 if flags:
1046 1046 raise RevlogError(_('unknown flags (%#04x) in version %d '
1047 1047 'revlog %s') %
1048 1048 (flags >> 16, fmt, self.indexfile))
1049 1049 elif fmt == REVLOGV1:
1050 1050 if flags & ~REVLOGV1_FLAGS:
1051 1051 raise RevlogError(_('unknown flags (%#04x) in version %d '
1052 1052 'revlog %s') %
1053 1053 (flags >> 16, fmt, self.indexfile))
1054 1054 elif fmt == REVLOGV2:
1055 1055 if flags & ~REVLOGV2_FLAGS:
1056 1056 raise RevlogError(_('unknown flags (%#04x) in version %d '
1057 1057 'revlog %s') %
1058 1058 (flags >> 16, fmt, self.indexfile))
1059 1059 else:
1060 1060 raise RevlogError(_('unknown version (%d) in revlog %s') %
1061 1061 (fmt, self.indexfile))
1062 1062
1063 1063 self.storedeltachains = True
1064 1064
1065 1065 self._io = revlogio()
1066 1066 if self.version == REVLOGV0:
1067 1067 self._io = revlogoldio()
1068 1068 try:
1069 1069 d = self._io.parseindex(indexdata, self._inline)
1070 1070 except (ValueError, IndexError):
1071 1071 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1072 1072 self.index, nodemap, self._chunkcache = d
1073 1073 if nodemap is not None:
1074 1074 self.nodemap = self._nodecache = nodemap
1075 1075 if not self._chunkcache:
1076 1076 self._chunkclear()
1077 1077 # revnum -> (chain-length, sum-delta-length)
1078 1078 self._chaininfocache = {}
1079 1079 # revlog header -> revlog compressor
1080 1080 self._decompressors = {}
1081 1081
1082 1082 @util.propertycache
1083 1083 def _compressor(self):
1084 1084 return util.compengines[self._compengine].revlogcompressor()
1085 1085
1086 1086 def _indexfp(self, mode='r'):
1087 1087 """file object for the revlog's index file"""
1088 1088 args = {r'mode': mode}
1089 1089 if mode != 'r':
1090 1090 args[r'checkambig'] = self._checkambig
1091 1091 if mode == 'w':
1092 1092 args[r'atomictemp'] = True
1093 1093 return self.opener(self.indexfile, **args)
1094 1094
1095 1095 def _datafp(self, mode='r'):
1096 1096 """file object for the revlog's data file"""
1097 1097 return self.opener(self.datafile, mode=mode)
1098 1098
1099 1099 @contextlib.contextmanager
1100 1100 def _datareadfp(self, existingfp=None):
1101 1101 """file object suitable to read data"""
1102 1102 if existingfp is not None:
1103 1103 yield existingfp
1104 1104 else:
1105 1105 if self._inline:
1106 1106 func = self._indexfp
1107 1107 else:
1108 1108 func = self._datafp
1109 1109 with func() as fp:
1110 1110 yield fp
1111 1111
1112 1112 def tip(self):
1113 1113 return self.node(len(self.index) - 1)
1114 1114 def __contains__(self, rev):
1115 1115 return 0 <= rev < len(self)
1116 1116 def __len__(self):
1117 1117 return len(self.index)
1118 1118 def __iter__(self):
1119 1119 return iter(pycompat.xrange(len(self)))
1120 1120 def revs(self, start=0, stop=None):
1121 1121 """iterate over all rev in this revlog (from start to stop)"""
1122 1122 step = 1
1123 1123 length = len(self)
1124 1124 if stop is not None:
1125 1125 if start > stop:
1126 1126 step = -1
1127 1127 stop += step
1128 1128 if stop > length:
1129 1129 stop = length
1130 1130 else:
1131 1131 stop = length
1132 1132 return pycompat.xrange(start, stop, step)
1133 1133
1134 1134 @util.propertycache
1135 1135 def nodemap(self):
1136 1136 if self.index:
1137 1137 # populate mapping down to the initial node
1138 1138 node0 = self.index[0][7] # get around changelog filtering
1139 1139 self.rev(node0)
1140 1140 return self._nodecache
1141 1141
1142 1142 def hasnode(self, node):
1143 1143 try:
1144 1144 self.rev(node)
1145 1145 return True
1146 1146 except KeyError:
1147 1147 return False
1148 1148
1149 1149 def candelta(self, baserev, rev):
1150 1150 """whether two revisions (baserev, rev) can be delta-ed or not"""
1151 1151 # Disable delta if either rev requires a content-changing flag
1152 1152 # processor (ex. LFS). This is because such flag processor can alter
1153 1153 # the rawtext content that the delta will be based on, and two clients
1154 1154 # could have a same revlog node with different flags (i.e. different
1155 1155 # rawtext contents) and the delta could be incompatible.
1156 1156 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1157 1157 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1158 1158 return False
1159 1159 return True
1160 1160
1161 1161 def clearcaches(self):
1162 1162 self._cache = None
1163 1163 self._chainbasecache.clear()
1164 1164 self._chunkcache = (0, '')
1165 1165 self._pcache = {}
1166 1166
1167 1167 try:
1168 1168 self._nodecache.clearcaches()
1169 1169 except AttributeError:
1170 1170 self._nodecache = {nullid: nullrev}
1171 1171 self._nodepos = None
1172 1172
1173 1173 def rev(self, node):
1174 1174 try:
1175 1175 return self._nodecache[node]
1176 1176 except TypeError:
1177 1177 raise
1178 1178 except RevlogError:
1179 1179 # parsers.c radix tree lookup failed
1180 1180 if node == wdirid or node in wdirfilenodeids:
1181 1181 raise error.WdirUnsupported
1182 1182 raise LookupError(node, self.indexfile, _('no node'))
1183 1183 except KeyError:
1184 1184 # pure python cache lookup failed
1185 1185 n = self._nodecache
1186 1186 i = self.index
1187 1187 p = self._nodepos
1188 1188 if p is None:
1189 1189 p = len(i) - 1
1190 1190 else:
1191 1191 assert p < len(i)
1192 1192 for r in pycompat.xrange(p, -1, -1):
1193 1193 v = i[r][7]
1194 1194 n[v] = r
1195 1195 if v == node:
1196 1196 self._nodepos = r - 1
1197 1197 return r
1198 1198 if node == wdirid or node in wdirfilenodeids:
1199 1199 raise error.WdirUnsupported
1200 1200 raise LookupError(node, self.indexfile, _('no node'))
1201 1201
1202 1202 # Accessors for index entries.
1203 1203
1204 1204 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1205 1205 # are flags.
1206 1206 def start(self, rev):
1207 1207 return int(self.index[rev][0] >> 16)
1208 1208
1209 1209 def flags(self, rev):
1210 1210 return self.index[rev][0] & 0xFFFF
1211 1211
1212 1212 def length(self, rev):
1213 1213 return self.index[rev][1]
1214 1214
1215 1215 def rawsize(self, rev):
1216 1216 """return the length of the uncompressed text for a given revision"""
1217 1217 l = self.index[rev][2]
1218 1218 if l >= 0:
1219 1219 return l
1220 1220
1221 1221 t = self.revision(rev, raw=True)
1222 1222 return len(t)
1223 1223
1224 1224 def size(self, rev):
1225 1225 """length of non-raw text (processed by a "read" flag processor)"""
1226 1226 # fast path: if no "read" flag processor could change the content,
1227 1227 # size is rawsize. note: ELLIPSIS is known to not change the content.
1228 1228 flags = self.flags(rev)
1229 1229 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1230 1230 return self.rawsize(rev)
1231 1231
1232 1232 return len(self.revision(rev, raw=False))
1233 1233
1234 1234 def chainbase(self, rev):
1235 1235 base = self._chainbasecache.get(rev)
1236 1236 if base is not None:
1237 1237 return base
1238 1238
1239 1239 index = self.index
1240 1240 iterrev = rev
1241 1241 base = index[iterrev][3]
1242 1242 while base != iterrev:
1243 1243 iterrev = base
1244 1244 base = index[iterrev][3]
1245 1245
1246 1246 self._chainbasecache[rev] = base
1247 1247 return base
1248 1248
1249 1249 def linkrev(self, rev):
1250 1250 return self.index[rev][4]
1251 1251
1252 1252 def parentrevs(self, rev):
1253 1253 try:
1254 1254 entry = self.index[rev]
1255 1255 except IndexError:
1256 1256 if rev == wdirrev:
1257 1257 raise error.WdirUnsupported
1258 1258 raise
1259 1259
1260 1260 return entry[5], entry[6]
1261 1261
1262 1262 def node(self, rev):
1263 1263 try:
1264 1264 return self.index[rev][7]
1265 1265 except IndexError:
1266 1266 if rev == wdirrev:
1267 1267 raise error.WdirUnsupported
1268 1268 raise
1269 1269
1270 1270 # Derived from index values.
1271 1271
1272 1272 def end(self, rev):
1273 1273 return self.start(rev) + self.length(rev)
1274 1274
1275 1275 def parents(self, node):
1276 1276 i = self.index
1277 1277 d = i[self.rev(node)]
1278 1278 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1279 1279
1280 1280 def chainlen(self, rev):
1281 1281 return self._chaininfo(rev)[0]
1282 1282
1283 1283 def _chaininfo(self, rev):
1284 1284 chaininfocache = self._chaininfocache
1285 1285 if rev in chaininfocache:
1286 1286 return chaininfocache[rev]
1287 1287 index = self.index
1288 1288 generaldelta = self._generaldelta
1289 1289 iterrev = rev
1290 1290 e = index[iterrev]
1291 1291 clen = 0
1292 1292 compresseddeltalen = 0
1293 1293 while iterrev != e[3]:
1294 1294 clen += 1
1295 1295 compresseddeltalen += e[1]
1296 1296 if generaldelta:
1297 1297 iterrev = e[3]
1298 1298 else:
1299 1299 iterrev -= 1
1300 1300 if iterrev in chaininfocache:
1301 1301 t = chaininfocache[iterrev]
1302 1302 clen += t[0]
1303 1303 compresseddeltalen += t[1]
1304 1304 break
1305 1305 e = index[iterrev]
1306 1306 else:
1307 1307 # Add text length of base since decompressing that also takes
1308 1308 # work. For cache hits the length is already included.
1309 1309 compresseddeltalen += e[1]
1310 1310 r = (clen, compresseddeltalen)
1311 1311 chaininfocache[rev] = r
1312 1312 return r
1313 1313
1314 1314 def _deltachain(self, rev, stoprev=None):
1315 1315 """Obtain the delta chain for a revision.
1316 1316
1317 1317 ``stoprev`` specifies a revision to stop at. If not specified, we
1318 1318 stop at the base of the chain.
1319 1319
1320 1320 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1321 1321 revs in ascending order and ``stopped`` is a bool indicating whether
1322 1322 ``stoprev`` was hit.
1323 1323 """
1324 1324 # Try C implementation.
1325 1325 try:
1326 1326 return self.index.deltachain(rev, stoprev, self._generaldelta)
1327 1327 except AttributeError:
1328 1328 pass
1329 1329
1330 1330 chain = []
1331 1331
1332 1332 # Alias to prevent attribute lookup in tight loop.
1333 1333 index = self.index
1334 1334 generaldelta = self._generaldelta
1335 1335
1336 1336 iterrev = rev
1337 1337 e = index[iterrev]
1338 1338 while iterrev != e[3] and iterrev != stoprev:
1339 1339 chain.append(iterrev)
1340 1340 if generaldelta:
1341 1341 iterrev = e[3]
1342 1342 else:
1343 1343 iterrev -= 1
1344 1344 e = index[iterrev]
1345 1345
1346 1346 if iterrev == stoprev:
1347 1347 stopped = True
1348 1348 else:
1349 1349 chain.append(iterrev)
1350 1350 stopped = False
1351 1351
1352 1352 chain.reverse()
1353 1353 return chain, stopped
1354 1354
1355 1355 def ancestors(self, revs, stoprev=0, inclusive=False):
1356 1356 """Generate the ancestors of 'revs' in reverse topological order.
1357 1357 Does not generate revs lower than stoprev.
1358 1358
1359 1359 See the documentation for ancestor.lazyancestors for more details."""
1360 1360
1361 1361 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1362 1362 inclusive=inclusive)
1363 1363
1364 1364 def descendants(self, revs):
1365 1365 """Generate the descendants of 'revs' in revision order.
1366 1366
1367 1367 Yield a sequence of revision numbers starting with a child of
1368 1368 some rev in revs, i.e., each revision is *not* considered a
1369 1369 descendant of itself. Results are ordered by revision number (a
1370 1370 topological sort)."""
1371 1371 first = min(revs)
1372 1372 if first == nullrev:
1373 1373 for i in self:
1374 1374 yield i
1375 1375 return
1376 1376
1377 1377 seen = set(revs)
1378 1378 for i in self.revs(start=first + 1):
1379 1379 for x in self.parentrevs(i):
1380 1380 if x != nullrev and x in seen:
1381 1381 seen.add(i)
1382 1382 yield i
1383 1383 break
1384 1384
1385 1385 def findcommonmissing(self, common=None, heads=None):
1386 1386 """Return a tuple of the ancestors of common and the ancestors of heads
1387 1387 that are not ancestors of common. In revset terminology, we return the
1388 1388 tuple:
1389 1389
1390 1390 ::common, (::heads) - (::common)
1391 1391
1392 1392 The list is sorted by revision number, meaning it is
1393 1393 topologically sorted.
1394 1394
1395 1395 'heads' and 'common' are both lists of node IDs. If heads is
1396 1396 not supplied, uses all of the revlog's heads. If common is not
1397 1397 supplied, uses nullid."""
1398 1398 if common is None:
1399 1399 common = [nullid]
1400 1400 if heads is None:
1401 1401 heads = self.heads()
1402 1402
1403 1403 common = [self.rev(n) for n in common]
1404 1404 heads = [self.rev(n) for n in heads]
1405 1405
1406 1406 # we want the ancestors, but inclusive
1407 1407 class lazyset(object):
1408 1408 def __init__(self, lazyvalues):
1409 1409 self.addedvalues = set()
1410 1410 self.lazyvalues = lazyvalues
1411 1411
1412 1412 def __contains__(self, value):
1413 1413 return value in self.addedvalues or value in self.lazyvalues
1414 1414
1415 1415 def __iter__(self):
1416 1416 added = self.addedvalues
1417 1417 for r in added:
1418 1418 yield r
1419 1419 for r in self.lazyvalues:
1420 1420 if not r in added:
1421 1421 yield r
1422 1422
1423 1423 def add(self, value):
1424 1424 self.addedvalues.add(value)
1425 1425
1426 1426 def update(self, values):
1427 1427 self.addedvalues.update(values)
1428 1428
1429 1429 has = lazyset(self.ancestors(common))
1430 1430 has.add(nullrev)
1431 1431 has.update(common)
1432 1432
1433 1433 # take all ancestors from heads that aren't in has
1434 1434 missing = set()
1435 1435 visit = collections.deque(r for r in heads if r not in has)
1436 1436 while visit:
1437 1437 r = visit.popleft()
1438 1438 if r in missing:
1439 1439 continue
1440 1440 else:
1441 1441 missing.add(r)
1442 1442 for p in self.parentrevs(r):
1443 1443 if p not in has:
1444 1444 visit.append(p)
1445 1445 missing = list(missing)
1446 1446 missing.sort()
1447 1447 return has, [self.node(miss) for miss in missing]
1448 1448
1449 1449 def incrementalmissingrevs(self, common=None):
1450 1450 """Return an object that can be used to incrementally compute the
1451 1451 revision numbers of the ancestors of arbitrary sets that are not
1452 1452 ancestors of common. This is an ancestor.incrementalmissingancestors
1453 1453 object.
1454 1454
1455 1455 'common' is a list of revision numbers. If common is not supplied, uses
1456 1456 nullrev.
1457 1457 """
1458 1458 if common is None:
1459 1459 common = [nullrev]
1460 1460
1461 1461 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1462 1462
1463 1463 def findmissingrevs(self, common=None, heads=None):
1464 1464 """Return the revision numbers of the ancestors of heads that
1465 1465 are not ancestors of common.
1466 1466
1467 1467 More specifically, return a list of revision numbers corresponding to
1468 1468 nodes N such that every N satisfies the following constraints:
1469 1469
1470 1470 1. N is an ancestor of some node in 'heads'
1471 1471 2. N is not an ancestor of any node in 'common'
1472 1472
1473 1473 The list is sorted by revision number, meaning it is
1474 1474 topologically sorted.
1475 1475
1476 1476 'heads' and 'common' are both lists of revision numbers. If heads is
1477 1477 not supplied, uses all of the revlog's heads. If common is not
1478 1478 supplied, uses nullid."""
1479 1479 if common is None:
1480 1480 common = [nullrev]
1481 1481 if heads is None:
1482 1482 heads = self.headrevs()
1483 1483
1484 1484 inc = self.incrementalmissingrevs(common=common)
1485 1485 return inc.missingancestors(heads)
1486 1486
1487 1487 def findmissing(self, common=None, heads=None):
1488 1488 """Return the ancestors of heads that are not ancestors of common.
1489 1489
1490 1490 More specifically, return a list of nodes N such that every N
1491 1491 satisfies the following constraints:
1492 1492
1493 1493 1. N is an ancestor of some node in 'heads'
1494 1494 2. N is not an ancestor of any node in 'common'
1495 1495
1496 1496 The list is sorted by revision number, meaning it is
1497 1497 topologically sorted.
1498 1498
1499 1499 'heads' and 'common' are both lists of node IDs. If heads is
1500 1500 not supplied, uses all of the revlog's heads. If common is not
1501 1501 supplied, uses nullid."""
1502 1502 if common is None:
1503 1503 common = [nullid]
1504 1504 if heads is None:
1505 1505 heads = self.heads()
1506 1506
1507 1507 common = [self.rev(n) for n in common]
1508 1508 heads = [self.rev(n) for n in heads]
1509 1509
1510 1510 inc = self.incrementalmissingrevs(common=common)
1511 1511 return [self.node(r) for r in inc.missingancestors(heads)]
1512 1512
1513 1513 def nodesbetween(self, roots=None, heads=None):
1514 1514 """Return a topological path from 'roots' to 'heads'.
1515 1515
1516 1516 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1517 1517 topologically sorted list of all nodes N that satisfy both of
1518 1518 these constraints:
1519 1519
1520 1520 1. N is a descendant of some node in 'roots'
1521 1521 2. N is an ancestor of some node in 'heads'
1522 1522
1523 1523 Every node is considered to be both a descendant and an ancestor
1524 1524 of itself, so every reachable node in 'roots' and 'heads' will be
1525 1525 included in 'nodes'.
1526 1526
1527 1527 'outroots' is the list of reachable nodes in 'roots', i.e., the
1528 1528 subset of 'roots' that is returned in 'nodes'. Likewise,
1529 1529 'outheads' is the subset of 'heads' that is also in 'nodes'.
1530 1530
1531 1531 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1532 1532 unspecified, uses nullid as the only root. If 'heads' is
1533 1533 unspecified, uses list of all of the revlog's heads."""
1534 1534 nonodes = ([], [], [])
1535 1535 if roots is not None:
1536 1536 roots = list(roots)
1537 1537 if not roots:
1538 1538 return nonodes
1539 1539 lowestrev = min([self.rev(n) for n in roots])
1540 1540 else:
1541 1541 roots = [nullid] # Everybody's a descendant of nullid
1542 1542 lowestrev = nullrev
1543 1543 if (lowestrev == nullrev) and (heads is None):
1544 1544 # We want _all_ the nodes!
1545 1545 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1546 1546 if heads is None:
1547 1547 # All nodes are ancestors, so the latest ancestor is the last
1548 1548 # node.
1549 1549 highestrev = len(self) - 1
1550 1550 # Set ancestors to None to signal that every node is an ancestor.
1551 1551 ancestors = None
1552 1552 # Set heads to an empty dictionary for later discovery of heads
1553 1553 heads = {}
1554 1554 else:
1555 1555 heads = list(heads)
1556 1556 if not heads:
1557 1557 return nonodes
1558 1558 ancestors = set()
1559 1559 # Turn heads into a dictionary so we can remove 'fake' heads.
1560 1560 # Also, later we will be using it to filter out the heads we can't
1561 1561 # find from roots.
1562 1562 heads = dict.fromkeys(heads, False)
1563 1563 # Start at the top and keep marking parents until we're done.
1564 1564 nodestotag = set(heads)
1565 1565 # Remember where the top was so we can use it as a limit later.
1566 1566 highestrev = max([self.rev(n) for n in nodestotag])
1567 1567 while nodestotag:
1568 1568 # grab a node to tag
1569 1569 n = nodestotag.pop()
1570 1570 # Never tag nullid
1571 1571 if n == nullid:
1572 1572 continue
1573 1573 # A node's revision number represents its place in a
1574 1574 # topologically sorted list of nodes.
1575 1575 r = self.rev(n)
1576 1576 if r >= lowestrev:
1577 1577 if n not in ancestors:
1578 1578 # If we are possibly a descendant of one of the roots
1579 1579 # and we haven't already been marked as an ancestor
1580 1580 ancestors.add(n) # Mark as ancestor
1581 1581 # Add non-nullid parents to list of nodes to tag.
1582 1582 nodestotag.update([p for p in self.parents(n) if
1583 1583 p != nullid])
1584 1584 elif n in heads: # We've seen it before, is it a fake head?
1585 1585 # So it is, real heads should not be the ancestors of
1586 1586 # any other heads.
1587 1587 heads.pop(n)
1588 1588 if not ancestors:
1589 1589 return nonodes
1590 1590 # Now that we have our set of ancestors, we want to remove any
1591 1591 # roots that are not ancestors.
1592 1592
1593 1593 # If one of the roots was nullid, everything is included anyway.
1594 1594 if lowestrev > nullrev:
1595 1595 # But, since we weren't, let's recompute the lowest rev to not
1596 1596 # include roots that aren't ancestors.
1597 1597
1598 1598 # Filter out roots that aren't ancestors of heads
1599 1599 roots = [root for root in roots if root in ancestors]
1600 1600 # Recompute the lowest revision
1601 1601 if roots:
1602 1602 lowestrev = min([self.rev(root) for root in roots])
1603 1603 else:
1604 1604 # No more roots? Return empty list
1605 1605 return nonodes
1606 1606 else:
1607 1607 # We are descending from nullid, and don't need to care about
1608 1608 # any other roots.
1609 1609 lowestrev = nullrev
1610 1610 roots = [nullid]
1611 1611 # Transform our roots list into a set.
1612 1612 descendants = set(roots)
1613 1613 # Also, keep the original roots so we can filter out roots that aren't
1614 1614 # 'real' roots (i.e. are descended from other roots).
1615 1615 roots = descendants.copy()
1616 1616 # Our topologically sorted list of output nodes.
1617 1617 orderedout = []
1618 1618 # Don't start at nullid since we don't want nullid in our output list,
1619 1619 # and if nullid shows up in descendants, empty parents will look like
1620 1620 # they're descendants.
1621 1621 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1622 1622 n = self.node(r)
1623 1623 isdescendant = False
1624 1624 if lowestrev == nullrev: # Everybody is a descendant of nullid
1625 1625 isdescendant = True
1626 1626 elif n in descendants:
1627 1627 # n is already a descendant
1628 1628 isdescendant = True
1629 1629 # This check only needs to be done here because all the roots
1630 1630 # will start being marked is descendants before the loop.
1631 1631 if n in roots:
1632 1632 # If n was a root, check if it's a 'real' root.
1633 1633 p = tuple(self.parents(n))
1634 1634 # If any of its parents are descendants, it's not a root.
1635 1635 if (p[0] in descendants) or (p[1] in descendants):
1636 1636 roots.remove(n)
1637 1637 else:
1638 1638 p = tuple(self.parents(n))
1639 1639 # A node is a descendant if either of its parents are
1640 1640 # descendants. (We seeded the dependents list with the roots
1641 1641 # up there, remember?)
1642 1642 if (p[0] in descendants) or (p[1] in descendants):
1643 1643 descendants.add(n)
1644 1644 isdescendant = True
1645 1645 if isdescendant and ((ancestors is None) or (n in ancestors)):
1646 1646 # Only include nodes that are both descendants and ancestors.
1647 1647 orderedout.append(n)
1648 1648 if (ancestors is not None) and (n in heads):
1649 1649 # We're trying to figure out which heads are reachable
1650 1650 # from roots.
1651 1651 # Mark this head as having been reached
1652 1652 heads[n] = True
1653 1653 elif ancestors is None:
1654 1654 # Otherwise, we're trying to discover the heads.
1655 1655 # Assume this is a head because if it isn't, the next step
1656 1656 # will eventually remove it.
1657 1657 heads[n] = True
1658 1658 # But, obviously its parents aren't.
1659 1659 for p in self.parents(n):
1660 1660 heads.pop(p, None)
1661 1661 heads = [head for head, flag in heads.iteritems() if flag]
1662 1662 roots = list(roots)
1663 1663 assert orderedout
1664 1664 assert roots
1665 1665 assert heads
1666 1666 return (orderedout, roots, heads)
1667 1667
1668 1668 def headrevs(self):
1669 1669 try:
1670 1670 return self.index.headrevs()
1671 1671 except AttributeError:
1672 1672 return self._headrevs()
1673 1673
1674 1674 def computephases(self, roots):
1675 1675 return self.index.computephasesmapsets(roots)
1676 1676
1677 1677 def _headrevs(self):
1678 1678 count = len(self)
1679 1679 if not count:
1680 1680 return [nullrev]
1681 1681 # we won't iter over filtered rev so nobody is a head at start
1682 1682 ishead = [0] * (count + 1)
1683 1683 index = self.index
1684 1684 for r in self:
1685 1685 ishead[r] = 1 # I may be an head
1686 1686 e = index[r]
1687 1687 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1688 1688 return [r for r, val in enumerate(ishead) if val]
1689 1689
1690 1690 def heads(self, start=None, stop=None):
1691 1691 """return the list of all nodes that have no children
1692 1692
1693 1693 if start is specified, only heads that are descendants of
1694 1694 start will be returned
1695 1695 if stop is specified, it will consider all the revs from stop
1696 1696 as if they had no children
1697 1697 """
1698 1698 if start is None and stop is None:
1699 1699 if not len(self):
1700 1700 return [nullid]
1701 1701 return [self.node(r) for r in self.headrevs()]
1702 1702
1703 1703 if start is None:
1704 1704 start = nullid
1705 1705 if stop is None:
1706 1706 stop = []
1707 1707 stoprevs = set([self.rev(n) for n in stop])
1708 1708 startrev = self.rev(start)
1709 1709 reachable = {startrev}
1710 1710 heads = {startrev}
1711 1711
1712 1712 parentrevs = self.parentrevs
1713 1713 for r in self.revs(start=startrev + 1):
1714 1714 for p in parentrevs(r):
1715 1715 if p in reachable:
1716 1716 if r not in stoprevs:
1717 1717 reachable.add(r)
1718 1718 heads.add(r)
1719 1719 if p in heads and p not in stoprevs:
1720 1720 heads.remove(p)
1721 1721
1722 1722 return [self.node(r) for r in heads]
1723 1723
1724 1724 def children(self, node):
1725 1725 """find the children of a given node"""
1726 1726 c = []
1727 1727 p = self.rev(node)
1728 1728 for r in self.revs(start=p + 1):
1729 1729 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1730 1730 if prevs:
1731 1731 for pr in prevs:
1732 1732 if pr == p:
1733 1733 c.append(self.node(r))
1734 1734 elif p == nullrev:
1735 1735 c.append(self.node(r))
1736 1736 return c
1737 1737
1738 1738 def commonancestorsheads(self, a, b):
1739 1739 """calculate all the heads of the common ancestors of nodes a and b"""
1740 1740 a, b = self.rev(a), self.rev(b)
1741 1741 ancs = self._commonancestorsheads(a, b)
1742 1742 return pycompat.maplist(self.node, ancs)
1743 1743
1744 1744 def _commonancestorsheads(self, *revs):
1745 1745 """calculate all the heads of the common ancestors of revs"""
1746 1746 try:
1747 1747 ancs = self.index.commonancestorsheads(*revs)
1748 1748 except (AttributeError, OverflowError): # C implementation failed
1749 1749 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1750 1750 return ancs
1751 1751
1752 1752 def isancestor(self, a, b):
1753 1753 """return True if node a is an ancestor of node b
1754 1754
1755 1755 A revision is considered an ancestor of itself."""
1756 1756 a, b = self.rev(a), self.rev(b)
1757 1757 return self.isancestorrev(a, b)
1758 1758
1759 1759 def isancestorrev(self, a, b):
1760 1760 """return True if revision a is an ancestor of revision b
1761 1761
1762 1762 A revision is considered an ancestor of itself.
1763 1763
1764 1764 The implementation of this is trivial but the use of
1765 1765 commonancestorsheads is not."""
1766 1766 if a == nullrev:
1767 1767 return True
1768 1768 elif a == b:
1769 1769 return True
1770 1770 elif a > b:
1771 1771 return False
1772 1772 return a in self._commonancestorsheads(a, b)
1773 1773
1774 1774 def ancestor(self, a, b):
1775 1775 """calculate the "best" common ancestor of nodes a and b"""
1776 1776
1777 1777 a, b = self.rev(a), self.rev(b)
1778 1778 try:
1779 1779 ancs = self.index.ancestors(a, b)
1780 1780 except (AttributeError, OverflowError):
1781 1781 ancs = ancestor.ancestors(self.parentrevs, a, b)
1782 1782 if ancs:
1783 1783 # choose a consistent winner when there's a tie
1784 1784 return min(map(self.node, ancs))
1785 1785 return nullid
1786 1786
1787 1787 def _match(self, id):
1788 1788 if isinstance(id, int):
1789 1789 # rev
1790 1790 return self.node(id)
1791 1791 if len(id) == 20:
1792 1792 # possibly a binary node
1793 1793 # odds of a binary node being all hex in ASCII are 1 in 10**25
1794 1794 try:
1795 1795 node = id
1796 1796 self.rev(node) # quick search the index
1797 1797 return node
1798 1798 except LookupError:
1799 1799 pass # may be partial hex id
1800 1800 try:
1801 1801 # str(rev)
1802 1802 rev = int(id)
1803 1803 if "%d" % rev != id:
1804 1804 raise ValueError
1805 1805 if rev < 0:
1806 1806 rev = len(self) + rev
1807 1807 if rev < 0 or rev >= len(self):
1808 1808 raise ValueError
1809 1809 return self.node(rev)
1810 1810 except (ValueError, OverflowError):
1811 1811 pass
1812 1812 if len(id) == 40:
1813 1813 try:
1814 1814 # a full hex nodeid?
1815 1815 node = bin(id)
1816 1816 self.rev(node)
1817 1817 return node
1818 1818 except (TypeError, LookupError):
1819 1819 pass
1820 1820
1821 1821 def _partialmatch(self, id):
1822 1822 # we don't care wdirfilenodeids as they should be always full hash
1823 1823 maybewdir = wdirhex.startswith(id)
1824 1824 try:
1825 1825 partial = self.index.partialmatch(id)
1826 1826 if partial and self.hasnode(partial):
1827 1827 if maybewdir:
1828 1828 # single 'ff...' match in radix tree, ambiguous with wdir
1829 1829 raise RevlogError
1830 1830 return partial
1831 1831 if maybewdir:
1832 1832 # no 'ff...' match in radix tree, wdir identified
1833 1833 raise error.WdirUnsupported
1834 1834 return None
1835 1835 except RevlogError:
1836 1836 # parsers.c radix tree lookup gave multiple matches
1837 1837 # fast path: for unfiltered changelog, radix tree is accurate
1838 1838 if not getattr(self, 'filteredrevs', None):
1839 1839 raise AmbiguousPrefixLookupError(id, self.indexfile,
1840 1840 _('ambiguous identifier'))
1841 1841 # fall through to slow path that filters hidden revisions
1842 1842 except (AttributeError, ValueError):
1843 1843 # we are pure python, or key was too short to search radix tree
1844 1844 pass
1845 1845
1846 1846 if id in self._pcache:
1847 1847 return self._pcache[id]
1848 1848
1849 1849 if len(id) <= 40:
1850 1850 try:
1851 1851 # hex(node)[:...]
1852 1852 l = len(id) // 2 # grab an even number of digits
1853 1853 prefix = bin(id[:l * 2])
1854 1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1855 1855 nl = [n for n in nl if hex(n).startswith(id) and
1856 1856 self.hasnode(n)]
1857 1857 if nullhex.startswith(id):
1858 1858 nl.append(nullid)
1859 1859 if len(nl) > 0:
1860 1860 if len(nl) == 1 and not maybewdir:
1861 1861 self._pcache[id] = nl[0]
1862 1862 return nl[0]
1863 1863 raise AmbiguousPrefixLookupError(id, self.indexfile,
1864 1864 _('ambiguous identifier'))
1865 1865 if maybewdir:
1866 1866 raise error.WdirUnsupported
1867 1867 return None
1868 1868 except TypeError:
1869 1869 pass
1870 1870
1871 1871 def lookup(self, id):
1872 1872 """locate a node based on:
1873 1873 - revision number or str(revision number)
1874 1874 - nodeid or subset of hex nodeid
1875 1875 """
1876 1876 n = self._match(id)
1877 1877 if n is not None:
1878 1878 return n
1879 1879 n = self._partialmatch(id)
1880 1880 if n:
1881 1881 return n
1882 1882
1883 1883 raise LookupError(id, self.indexfile, _('no match found'))
1884 1884
1885 1885 def shortest(self, node, minlength=1):
1886 1886 """Find the shortest unambiguous prefix that matches node."""
1887 1887 def isvalid(prefix):
1888 1888 try:
1889 1889 node = self._partialmatch(prefix)
1890 1890 except error.RevlogError:
1891 1891 return False
1892 1892 except error.WdirUnsupported:
1893 1893 # single 'ff...' match
1894 1894 return True
1895 1895 if node is None:
1896 1896 raise LookupError(node, self.indexfile, _('no node'))
1897 1897 return True
1898 1898
1899 1899 def maybewdir(prefix):
1900 1900 return all(c == 'f' for c in prefix)
1901 1901
1902 1902 hexnode = hex(node)
1903 1903
1904 1904 def disambiguate(hexnode, minlength):
1905 1905 """Disambiguate against wdirid."""
1906 1906 for length in range(minlength, 41):
1907 1907 prefix = hexnode[:length]
1908 1908 if not maybewdir(prefix):
1909 1909 return prefix
1910 1910
1911 1911 if not getattr(self, 'filteredrevs', None):
1912 1912 try:
1913 1913 length = max(self.index.shortest(node), minlength)
1914 1914 return disambiguate(hexnode, length)
1915 1915 except RevlogError:
1916 1916 if node != wdirid:
1917 1917 raise LookupError(node, self.indexfile, _('no node'))
1918 1918 except AttributeError:
1919 1919 # Fall through to pure code
1920 1920 pass
1921 1921
1922 1922 if node == wdirid:
1923 1923 for length in range(minlength, 41):
1924 1924 prefix = hexnode[:length]
1925 1925 if isvalid(prefix):
1926 1926 return prefix
1927 1927
1928 1928 for length in range(minlength, 41):
1929 1929 prefix = hexnode[:length]
1930 1930 if isvalid(prefix):
1931 1931 return disambiguate(hexnode, length)
1932 1932
1933 1933 def cmp(self, node, text):
1934 1934 """compare text with a given file revision
1935 1935
1936 1936 returns True if text is different than what is stored.
1937 1937 """
1938 1938 p1, p2 = self.parents(node)
1939 1939 return hash(text, p1, p2) != node
1940 1940
1941 1941 def _cachesegment(self, offset, data):
1942 1942 """Add a segment to the revlog cache.
1943 1943
1944 1944 Accepts an absolute offset and the data that is at that location.
1945 1945 """
1946 1946 o, d = self._chunkcache
1947 1947 # try to add to existing cache
1948 1948 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1949 1949 self._chunkcache = o, d + data
1950 1950 else:
1951 1951 self._chunkcache = offset, data
1952 1952
1953 1953 def _readsegment(self, offset, length, df=None):
1954 1954 """Load a segment of raw data from the revlog.
1955 1955
1956 1956 Accepts an absolute offset, length to read, and an optional existing
1957 1957 file handle to read from.
1958 1958
1959 1959 If an existing file handle is passed, it will be seeked and the
1960 1960 original seek position will NOT be restored.
1961 1961
1962 1962 Returns a str or buffer of raw byte data.
1963 1963 """
1964 1964 # Cache data both forward and backward around the requested
1965 1965 # data, in a fixed size window. This helps speed up operations
1966 1966 # involving reading the revlog backwards.
1967 1967 cachesize = self._chunkcachesize
1968 1968 realoffset = offset & ~(cachesize - 1)
1969 1969 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1970 1970 - realoffset)
1971 1971 with self._datareadfp(df) as df:
1972 1972 df.seek(realoffset)
1973 1973 d = df.read(reallength)
1974 1974 self._cachesegment(realoffset, d)
1975 1975 if offset != realoffset or reallength != length:
1976 1976 return util.buffer(d, offset - realoffset, length)
1977 1977 return d
1978 1978
1979 1979 def _getsegment(self, offset, length, df=None):
1980 1980 """Obtain a segment of raw data from the revlog.
1981 1981
1982 1982 Accepts an absolute offset, length of bytes to obtain, and an
1983 1983 optional file handle to the already-opened revlog. If the file
1984 1984 handle is used, it's original seek position will not be preserved.
1985 1985
1986 1986 Requests for data may be returned from a cache.
1987 1987
1988 1988 Returns a str or a buffer instance of raw byte data.
1989 1989 """
1990 1990 o, d = self._chunkcache
1991 1991 l = len(d)
1992 1992
1993 1993 # is it in the cache?
1994 1994 cachestart = offset - o
1995 1995 cacheend = cachestart + length
1996 1996 if cachestart >= 0 and cacheend <= l:
1997 1997 if cachestart == 0 and cacheend == l:
1998 1998 return d # avoid a copy
1999 1999 return util.buffer(d, cachestart, cacheend - cachestart)
2000 2000
2001 2001 return self._readsegment(offset, length, df=df)
2002 2002
2003 2003 def _getsegmentforrevs(self, startrev, endrev, df=None):
2004 2004 """Obtain a segment of raw data corresponding to a range of revisions.
2005 2005
2006 2006 Accepts the start and end revisions and an optional already-open
2007 2007 file handle to be used for reading. If the file handle is read, its
2008 2008 seek position will not be preserved.
2009 2009
2010 2010 Requests for data may be satisfied by a cache.
2011 2011
2012 2012 Returns a 2-tuple of (offset, data) for the requested range of
2013 2013 revisions. Offset is the integer offset from the beginning of the
2014 2014 revlog and data is a str or buffer of the raw byte data.
2015 2015
2016 2016 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2017 2017 to determine where each revision's data begins and ends.
2018 2018 """
2019 2019 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2020 2020 # (functions are expensive).
2021 2021 index = self.index
2022 2022 istart = index[startrev]
2023 2023 start = int(istart[0] >> 16)
2024 2024 if startrev == endrev:
2025 2025 end = start + istart[1]
2026 2026 else:
2027 2027 iend = index[endrev]
2028 2028 end = int(iend[0] >> 16) + iend[1]
2029 2029
2030 2030 if self._inline:
2031 2031 start += (startrev + 1) * self._io.size
2032 2032 end += (endrev + 1) * self._io.size
2033 2033 length = end - start
2034 2034
2035 2035 return start, self._getsegment(start, length, df=df)
2036 2036
2037 2037 def _chunk(self, rev, df=None):
2038 2038 """Obtain a single decompressed chunk for a revision.
2039 2039
2040 2040 Accepts an integer revision and an optional already-open file handle
2041 2041 to be used for reading. If used, the seek position of the file will not
2042 2042 be preserved.
2043 2043
2044 2044 Returns a str holding uncompressed data for the requested revision.
2045 2045 """
2046 2046 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2047 2047
2048 2048 def _chunks(self, revs, df=None, targetsize=None):
2049 2049 """Obtain decompressed chunks for the specified revisions.
2050 2050
2051 2051 Accepts an iterable of numeric revisions that are assumed to be in
2052 2052 ascending order. Also accepts an optional already-open file handle
2053 2053 to be used for reading. If used, the seek position of the file will
2054 2054 not be preserved.
2055 2055
2056 2056 This function is similar to calling ``self._chunk()`` multiple times,
2057 2057 but is faster.
2058 2058
2059 2059 Returns a list with decompressed data for each requested revision.
2060 2060 """
2061 2061 if not revs:
2062 2062 return []
2063 2063 start = self.start
2064 2064 length = self.length
2065 2065 inline = self._inline
2066 2066 iosize = self._io.size
2067 2067 buffer = util.buffer
2068 2068
2069 2069 l = []
2070 2070 ladd = l.append
2071 2071
2072 2072 if not self._withsparseread:
2073 2073 slicedchunks = (revs,)
2074 2074 else:
2075 2075 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2076 2076
2077 2077 for revschunk in slicedchunks:
2078 2078 firstrev = revschunk[0]
2079 2079 # Skip trailing revisions with empty diff
2080 2080 for lastrev in revschunk[::-1]:
2081 2081 if length(lastrev) != 0:
2082 2082 break
2083 2083
2084 2084 try:
2085 2085 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2086 2086 except OverflowError:
2087 2087 # issue4215 - we can't cache a run of chunks greater than
2088 2088 # 2G on Windows
2089 2089 return [self._chunk(rev, df=df) for rev in revschunk]
2090 2090
2091 2091 decomp = self.decompress
2092 2092 for rev in revschunk:
2093 2093 chunkstart = start(rev)
2094 2094 if inline:
2095 2095 chunkstart += (rev + 1) * iosize
2096 2096 chunklength = length(rev)
2097 2097 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2098 2098
2099 2099 return l
2100 2100
2101 2101 def _chunkclear(self):
2102 2102 """Clear the raw chunk cache."""
2103 2103 self._chunkcache = (0, '')
2104 2104
2105 2105 def deltaparent(self, rev):
2106 2106 """return deltaparent of the given revision"""
2107 2107 base = self.index[rev][3]
2108 2108 if base == rev:
2109 2109 return nullrev
2110 2110 elif self._generaldelta:
2111 2111 return base
2112 2112 else:
2113 2113 return rev - 1
2114 2114
2115 2115 def issnapshot(self, rev):
2116 2116 """tells whether rev is a snapshot
2117 2117 """
2118 2118 if rev == nullrev:
2119 2119 return True
2120 2120 deltap = self.deltaparent(rev)
2121 2121 if deltap == nullrev:
2122 2122 return True
2123 2123 p1, p2 = self.parentrevs(rev)
2124 2124 if deltap in (p1, p2):
2125 2125 return False
2126 2126 return self.issnapshot(deltap)
2127 2127
2128 2128 def snapshotdepth(self, rev):
2129 2129 """number of snapshot in the chain before this one"""
2130 2130 if not self.issnapshot(rev):
2131 2131 raise ProgrammingError('revision %d not a snapshot')
2132 2132 return len(self._deltachain(rev)[0]) - 1
2133 2133
2134 2134 def revdiff(self, rev1, rev2):
2135 2135 """return or calculate a delta between two revisions
2136 2136
2137 2137 The delta calculated is in binary form and is intended to be written to
2138 2138 revlog data directly. So this function needs raw revision data.
2139 2139 """
2140 2140 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2141 2141 return bytes(self._chunk(rev2))
2142 2142
2143 2143 return mdiff.textdiff(self.revision(rev1, raw=True),
2144 2144 self.revision(rev2, raw=True))
2145 2145
2146 2146 def revision(self, nodeorrev, _df=None, raw=False):
2147 2147 """return an uncompressed revision of a given node or revision
2148 2148 number.
2149 2149
2150 2150 _df - an existing file handle to read from. (internal-only)
2151 2151 raw - an optional argument specifying if the revision data is to be
2152 2152 treated as raw data when applying flag transforms. 'raw' should be set
2153 2153 to True when generating changegroups or in debug commands.
2154 2154 """
2155 2155 if isinstance(nodeorrev, int):
2156 2156 rev = nodeorrev
2157 2157 node = self.node(rev)
2158 2158 else:
2159 2159 node = nodeorrev
2160 2160 rev = None
2161 2161
2162 2162 cachedrev = None
2163 2163 flags = None
2164 2164 rawtext = None
2165 2165 if node == nullid:
2166 2166 return ""
2167 2167 if self._cache:
2168 2168 if self._cache[0] == node:
2169 2169 # _cache only stores rawtext
2170 2170 if raw:
2171 2171 return self._cache[2]
2172 2172 # duplicated, but good for perf
2173 2173 if rev is None:
2174 2174 rev = self.rev(node)
2175 2175 if flags is None:
2176 2176 flags = self.flags(rev)
2177 2177 # no extra flags set, no flag processor runs, text = rawtext
2178 2178 if flags == REVIDX_DEFAULT_FLAGS:
2179 2179 return self._cache[2]
2180 2180 # rawtext is reusable. need to run flag processor
2181 2181 rawtext = self._cache[2]
2182 2182
2183 2183 cachedrev = self._cache[1]
2184 2184
2185 2185 # look up what we need to read
2186 2186 if rawtext is None:
2187 2187 if rev is None:
2188 2188 rev = self.rev(node)
2189 2189
2190 2190 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2191 2191 if stopped:
2192 2192 rawtext = self._cache[2]
2193 2193
2194 2194 # drop cache to save memory
2195 2195 self._cache = None
2196 2196
2197 2197 targetsize = None
2198 2198 rawsize = self.index[rev][2]
2199 2199 if 0 <= rawsize:
2200 2200 targetsize = 4 * rawsize
2201 2201
2202 2202 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2203 2203 if rawtext is None:
2204 2204 rawtext = bytes(bins[0])
2205 2205 bins = bins[1:]
2206 2206
2207 2207 rawtext = mdiff.patches(rawtext, bins)
2208 2208 self._cache = (node, rev, rawtext)
2209 2209
2210 2210 if flags is None:
2211 2211 if rev is None:
2212 2212 rev = self.rev(node)
2213 2213 flags = self.flags(rev)
2214 2214
2215 2215 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2216 2216 if validatehash:
2217 2217 self.checkhash(text, node, rev=rev)
2218 2218
2219 2219 return text
2220 2220
2221 2221 def hash(self, text, p1, p2):
2222 2222 """Compute a node hash.
2223 2223
2224 2224 Available as a function so that subclasses can replace the hash
2225 2225 as needed.
2226 2226 """
2227 2227 return hash(text, p1, p2)
2228 2228
2229 2229 def _processflags(self, text, flags, operation, raw=False):
2230 2230 """Inspect revision data flags and applies transforms defined by
2231 2231 registered flag processors.
2232 2232
2233 2233 ``text`` - the revision data to process
2234 2234 ``flags`` - the revision flags
2235 2235 ``operation`` - the operation being performed (read or write)
2236 2236 ``raw`` - an optional argument describing if the raw transform should be
2237 2237 applied.
2238 2238
2239 2239 This method processes the flags in the order (or reverse order if
2240 2240 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2241 2241 flag processors registered for present flags. The order of flags defined
2242 2242 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2243 2243
2244 2244 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2245 2245 processed text and ``validatehash`` is a bool indicating whether the
2246 2246 returned text should be checked for hash integrity.
2247 2247
2248 2248 Note: If the ``raw`` argument is set, it has precedence over the
2249 2249 operation and will only update the value of ``validatehash``.
2250 2250 """
2251 2251 # fast path: no flag processors will run
2252 2252 if flags == 0:
2253 2253 return text, True
2254 2254 if not operation in ('read', 'write'):
2255 2255 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2256 2256 # Check all flags are known.
2257 2257 if flags & ~REVIDX_KNOWN_FLAGS:
2258 2258 raise RevlogError(_("incompatible revision flag '%#x'") %
2259 2259 (flags & ~REVIDX_KNOWN_FLAGS))
2260 2260 validatehash = True
2261 2261 # Depending on the operation (read or write), the order might be
2262 2262 # reversed due to non-commutative transforms.
2263 2263 orderedflags = REVIDX_FLAGS_ORDER
2264 2264 if operation == 'write':
2265 2265 orderedflags = reversed(orderedflags)
2266 2266
2267 2267 for flag in orderedflags:
2268 2268 # If a flagprocessor has been registered for a known flag, apply the
2269 2269 # related operation transform and update result tuple.
2270 2270 if flag & flags:
2271 2271 vhash = True
2272 2272
2273 2273 if flag not in _flagprocessors:
2274 2274 message = _("missing processor for flag '%#x'") % (flag)
2275 2275 raise RevlogError(message)
2276 2276
2277 2277 processor = _flagprocessors[flag]
2278 2278 if processor is not None:
2279 2279 readtransform, writetransform, rawtransform = processor
2280 2280
2281 2281 if raw:
2282 2282 vhash = rawtransform(self, text)
2283 2283 elif operation == 'read':
2284 2284 text, vhash = readtransform(self, text)
2285 2285 else: # write operation
2286 2286 text, vhash = writetransform(self, text)
2287 2287 validatehash = validatehash and vhash
2288 2288
2289 2289 return text, validatehash
2290 2290
2291 2291 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2292 2292 """Check node hash integrity.
2293 2293
2294 2294 Available as a function so that subclasses can extend hash mismatch
2295 2295 behaviors as needed.
2296 2296 """
2297 2297 try:
2298 2298 if p1 is None and p2 is None:
2299 2299 p1, p2 = self.parents(node)
2300 2300 if node != self.hash(text, p1, p2):
2301 2301 revornode = rev
2302 2302 if revornode is None:
2303 2303 revornode = templatefilters.short(hex(node))
2304 2304 raise RevlogError(_("integrity check failed on %s:%s")
2305 2305 % (self.indexfile, pycompat.bytestr(revornode)))
2306 2306 except RevlogError:
2307 2307 if self._censorable and _censoredtext(text):
2308 2308 raise error.CensoredNodeError(self.indexfile, node, text)
2309 2309 raise
2310 2310
2311 2311 def _enforceinlinesize(self, tr, fp=None):
2312 2312 """Check if the revlog is too big for inline and convert if so.
2313 2313
2314 2314 This should be called after revisions are added to the revlog. If the
2315 2315 revlog has grown too large to be an inline revlog, it will convert it
2316 2316 to use multiple index and data files.
2317 2317 """
2318 2318 tiprev = len(self) - 1
2319 2319 if (not self._inline or
2320 2320 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2321 2321 return
2322 2322
2323 2323 trinfo = tr.find(self.indexfile)
2324 2324 if trinfo is None:
2325 2325 raise RevlogError(_("%s not found in the transaction")
2326 2326 % self.indexfile)
2327 2327
2328 2328 trindex = trinfo[2]
2329 2329 if trindex is not None:
2330 2330 dataoff = self.start(trindex)
2331 2331 else:
2332 2332 # revlog was stripped at start of transaction, use all leftover data
2333 2333 trindex = len(self) - 1
2334 2334 dataoff = self.end(tiprev)
2335 2335
2336 2336 tr.add(self.datafile, dataoff)
2337 2337
2338 2338 if fp:
2339 2339 fp.flush()
2340 2340 fp.close()
2341 2341
2342 2342 with self._datafp('w') as df:
2343 2343 for r in self:
2344 2344 df.write(self._getsegmentforrevs(r, r)[1])
2345 2345
2346 2346 with self._indexfp('w') as fp:
2347 2347 self.version &= ~FLAG_INLINE_DATA
2348 2348 self._inline = False
2349 2349 io = self._io
2350 2350 for i in self:
2351 2351 e = io.packentry(self.index[i], self.node, self.version, i)
2352 2352 fp.write(e)
2353 2353
2354 2354 # the temp file replace the real index when we exit the context
2355 2355 # manager
2356 2356
2357 2357 tr.replace(self.indexfile, trindex * self._io.size)
2358 2358 self._chunkclear()
2359 2359
2360 2360 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2361 2361 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2362 2362 """add a revision to the log
2363 2363
2364 2364 text - the revision data to add
2365 2365 transaction - the transaction object used for rollback
2366 2366 link - the linkrev data to add
2367 2367 p1, p2 - the parent nodeids of the revision
2368 2368 cachedelta - an optional precomputed delta
2369 2369 node - nodeid of revision; typically node is not specified, and it is
2370 2370 computed by default as hash(text, p1, p2), however subclasses might
2371 2371 use different hashing method (and override checkhash() in such case)
2372 2372 flags - the known flags to set on the revision
2373 2373 deltacomputer - an optional _deltacomputer instance shared between
2374 2374 multiple calls
2375 2375 """
2376 2376 if link == nullrev:
2377 2377 raise RevlogError(_("attempted to add linkrev -1 to %s")
2378 2378 % self.indexfile)
2379 2379
2380 2380 if flags:
2381 2381 node = node or self.hash(text, p1, p2)
2382 2382
2383 2383 rawtext, validatehash = self._processflags(text, flags, 'write')
2384 2384
2385 2385 # If the flag processor modifies the revision data, ignore any provided
2386 2386 # cachedelta.
2387 2387 if rawtext != text:
2388 2388 cachedelta = None
2389 2389
2390 2390 if len(rawtext) > _maxentrysize:
2391 2391 raise RevlogError(
2392 2392 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2393 2393 % (self.indexfile, len(rawtext)))
2394 2394
2395 2395 node = node or self.hash(rawtext, p1, p2)
2396 2396 if node in self.nodemap:
2397 2397 return node
2398 2398
2399 2399 if validatehash:
2400 2400 self.checkhash(rawtext, node, p1=p1, p2=p2)
2401 2401
2402 2402 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2403 2403 flags, cachedelta=cachedelta,
2404 2404 deltacomputer=deltacomputer)
2405 2405
2406 2406 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2407 2407 cachedelta=None, deltacomputer=None):
2408 2408 """add a raw revision with known flags, node and parents
2409 2409 useful when reusing a revision not stored in this revlog (ex: received
2410 2410 over wire, or read from an external bundle).
2411 2411 """
2412 2412 dfh = None
2413 2413 if not self._inline:
2414 2414 dfh = self._datafp("a+")
2415 2415 ifh = self._indexfp("a+")
2416 2416 try:
2417 2417 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2418 2418 flags, cachedelta, ifh, dfh,
2419 2419 deltacomputer=deltacomputer)
2420 2420 finally:
2421 2421 if dfh:
2422 2422 dfh.close()
2423 2423 ifh.close()
2424 2424
2425 2425 def compress(self, data):
2426 2426 """Generate a possibly-compressed representation of data."""
2427 2427 if not data:
2428 2428 return '', data
2429 2429
2430 2430 compressed = self._compressor.compress(data)
2431 2431
2432 2432 if compressed:
2433 2433 # The revlog compressor added the header in the returned data.
2434 2434 return '', compressed
2435 2435
2436 2436 if data[0:1] == '\0':
2437 2437 return '', data
2438 2438 return 'u', data
2439 2439
2440 2440 def decompress(self, data):
2441 2441 """Decompress a revlog chunk.
2442 2442
2443 2443 The chunk is expected to begin with a header identifying the
2444 2444 format type so it can be routed to an appropriate decompressor.
2445 2445 """
2446 2446 if not data:
2447 2447 return data
2448 2448
2449 2449 # Revlogs are read much more frequently than they are written and many
2450 2450 # chunks only take microseconds to decompress, so performance is
2451 2451 # important here.
2452 2452 #
2453 2453 # We can make a few assumptions about revlogs:
2454 2454 #
2455 2455 # 1) the majority of chunks will be compressed (as opposed to inline
2456 2456 # raw data).
2457 2457 # 2) decompressing *any* data will likely by at least 10x slower than
2458 2458 # returning raw inline data.
2459 2459 # 3) we want to prioritize common and officially supported compression
2460 2460 # engines
2461 2461 #
2462 2462 # It follows that we want to optimize for "decompress compressed data
2463 2463 # when encoded with common and officially supported compression engines"
2464 2464 # case over "raw data" and "data encoded by less common or non-official
2465 2465 # compression engines." That is why we have the inline lookup first
2466 2466 # followed by the compengines lookup.
2467 2467 #
2468 2468 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2469 2469 # compressed chunks. And this matters for changelog and manifest reads.
2470 2470 t = data[0:1]
2471 2471
2472 2472 if t == 'x':
2473 2473 try:
2474 2474 return _zlibdecompress(data)
2475 2475 except zlib.error as e:
2476 2476 raise RevlogError(_('revlog decompress error: %s') %
2477 2477 stringutil.forcebytestr(e))
2478 2478 # '\0' is more common than 'u' so it goes first.
2479 2479 elif t == '\0':
2480 2480 return data
2481 2481 elif t == 'u':
2482 2482 return util.buffer(data, 1)
2483 2483
2484 2484 try:
2485 2485 compressor = self._decompressors[t]
2486 2486 except KeyError:
2487 2487 try:
2488 2488 engine = util.compengines.forrevlogheader(t)
2489 2489 compressor = engine.revlogcompressor()
2490 2490 self._decompressors[t] = compressor
2491 2491 except KeyError:
2492 2492 raise RevlogError(_('unknown compression type %r') % t)
2493 2493
2494 2494 return compressor.decompress(data)
2495 2495
2496 2496 def _isgooddeltainfo(self, deltainfo, revinfo):
2497 2497 """Returns True if the given delta is good. Good means that it is within
2498 2498 the disk span, disk size, and chain length bounds that we know to be
2499 2499 performant."""
2500 2500 if deltainfo is None:
2501 2501 return False
2502 2502
2503 2503 # - 'deltainfo.distance' is the distance from the base revision --
2504 2504 # bounding it limits the amount of I/O we need to do.
2505 2505 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2506 2506 # deltas we need to apply -- bounding it limits the amount of CPU
2507 2507 # we consume.
2508 2508
2509 2509 if self._sparserevlog:
2510 2510 # As sparse-read will be used, we can consider that the distance,
2511 2511 # instead of being the span of the whole chunk,
2512 2512 # is the span of the largest read chunk
2513 2513 base = deltainfo.base
2514 2514
2515 2515 if base != nullrev:
2516 2516 deltachain = self._deltachain(base)[0]
2517 2517 else:
2518 2518 deltachain = []
2519 2519
2520 2520 # search for the first non-snapshot revision
2521 2521 for idx, r in enumerate(deltachain):
2522 2522 if not self.issnapshot(r):
2523 2523 break
2524 2524 deltachain = deltachain[idx:]
2525 2525 chunks = _slicechunk(self, deltachain, deltainfo)
2526 2526 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2527 2527 distance = max(all_span)
2528 2528 else:
2529 2529 distance = deltainfo.distance
2530 2530
2531 2531 textlen = revinfo.textlen
2532 2532 defaultmax = textlen * 4
2533 2533 maxdist = self._maxdeltachainspan
2534 2534 if not maxdist:
2535 2535 maxdist = distance # ensure the conditional pass
2536 2536 maxdist = max(maxdist, defaultmax)
2537 2537 if self._sparserevlog and maxdist < self._srmingapsize:
2538 2538 # In multiple place, we are ignoring irrelevant data range below a
2539 2539 # certain size. Be also apply this tradeoff here and relax span
2540 2540 # constraint for small enought content.
2541 2541 maxdist = self._srmingapsize
2542 2542
2543 2543 # Bad delta from read span:
2544 2544 #
2545 2545 # If the span of data read is larger than the maximum allowed.
2546 2546 if maxdist < distance:
2547 2547 return False
2548 2548
2549 2549 # Bad delta from new delta size:
2550 2550 #
2551 2551 # If the delta size is larger than the target text, storing the
2552 2552 # delta will be inefficient.
2553 2553 if textlen < deltainfo.deltalen:
2554 2554 return False
2555 2555
2556 2556 # Bad delta from cumulated payload size:
2557 2557 #
2558 2558 # If the sum of delta get larger than K * target text length.
2559 2559 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2560 2560 return False
2561 2561
2562 2562 # Bad delta from chain length:
2563 2563 #
2564 2564 # If the number of delta in the chain gets too high.
2565 2565 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2566 2566 return False
2567 2567
2568 2568 # bad delta from intermediate snapshot size limit
2569 2569 #
2570 2570 # If an intermediate snapshot size is higher than the limit. The
2571 2571 # limit exist to prevent endless chain of intermediate delta to be
2572 2572 # created.
2573 2573 if (deltainfo.snapshotdepth is not None and
2574 2574 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2575 2575 return False
2576 2576
2577 2577 # bad delta if new intermediate snapshot is larger than the previous
2578 2578 # snapshot
2579 2579 if (deltainfo.snapshotdepth
2580 2580 and self.length(deltainfo.base) < deltainfo.deltalen):
2581 2581 return False
2582 2582
2583 2583 return True
2584 2584
2585 2585 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2586 2586 cachedelta, ifh, dfh, alwayscache=False,
2587 2587 deltacomputer=None):
2588 2588 """internal function to add revisions to the log
2589 2589
2590 2590 see addrevision for argument descriptions.
2591 2591
2592 2592 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2593 2593
2594 2594 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2595 2595 be used.
2596 2596
2597 2597 invariants:
2598 2598 - rawtext is optional (can be None); if not set, cachedelta must be set.
2599 2599 if both are set, they must correspond to each other.
2600 2600 """
2601 2601 if node == nullid:
2602 2602 raise RevlogError(_("%s: attempt to add null revision") %
2603 2603 (self.indexfile))
2604 2604 if node == wdirid or node in wdirfilenodeids:
2605 2605 raise RevlogError(_("%s: attempt to add wdir revision") %
2606 2606 (self.indexfile))
2607 2607
2608 2608 if self._inline:
2609 2609 fh = ifh
2610 2610 else:
2611 2611 fh = dfh
2612 2612
2613 2613 btext = [rawtext]
2614 2614
2615 2615 curr = len(self)
2616 2616 prev = curr - 1
2617 2617 offset = self.end(prev)
2618 2618 p1r, p2r = self.rev(p1), self.rev(p2)
2619 2619
2620 2620 # full versions are inserted when the needed deltas
2621 2621 # become comparable to the uncompressed text
2622 2622 if rawtext is None:
2623 2623 # need rawtext size, before changed by flag processors, which is
2624 2624 # the non-raw size. use revlog explicitly to avoid filelog's extra
2625 2625 # logic that might remove metadata size.
2626 2626 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2627 2627 cachedelta[1])
2628 2628 else:
2629 2629 textlen = len(rawtext)
2630 2630
2631 2631 if deltacomputer is None:
2632 2632 deltacomputer = _deltacomputer(self)
2633 2633
2634 2634 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2635 2635
2636 2636 # no delta for flag processor revision (see "candelta" for why)
2637 2637 # not calling candelta since only one revision needs test, also to
2638 2638 # avoid overhead fetching flags again.
2639 2639 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2640 2640 deltainfo = None
2641 2641 else:
2642 2642 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2643 2643
2644 2644 if deltainfo is not None:
2645 2645 base = deltainfo.base
2646 2646 chainbase = deltainfo.chainbase
2647 2647 data = deltainfo.data
2648 2648 l = deltainfo.deltalen
2649 2649 else:
2650 2650 rawtext = deltacomputer.buildtext(revinfo, fh)
2651 2651 data = self.compress(rawtext)
2652 2652 l = len(data[1]) + len(data[0])
2653 2653 base = chainbase = curr
2654 2654
2655 2655 e = (offset_type(offset, flags), l, textlen,
2656 2656 base, link, p1r, p2r, node)
2657 2657 self.index.append(e)
2658 2658 self.nodemap[node] = curr
2659 2659
2660 2660 entry = self._io.packentry(e, self.node, self.version, curr)
2661 2661 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2662 2662
2663 2663 if alwayscache and rawtext is None:
2664 rawtext = deltacomputer._buildtext(revinfo, fh)
2664 rawtext = deltacomputer.buildtext(revinfo, fh)
2665 2665
2666 2666 if type(rawtext) == bytes: # only accept immutable objects
2667 2667 self._cache = (node, curr, rawtext)
2668 2668 self._chainbasecache[curr] = chainbase
2669 2669 return node
2670 2670
2671 2671 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2672 2672 # Files opened in a+ mode have inconsistent behavior on various
2673 2673 # platforms. Windows requires that a file positioning call be made
2674 2674 # when the file handle transitions between reads and writes. See
2675 2675 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2676 2676 # platforms, Python or the platform itself can be buggy. Some versions
2677 2677 # of Solaris have been observed to not append at the end of the file
2678 2678 # if the file was seeked to before the end. See issue4943 for more.
2679 2679 #
2680 2680 # We work around this issue by inserting a seek() before writing.
2681 2681 # Note: This is likely not necessary on Python 3.
2682 2682 ifh.seek(0, os.SEEK_END)
2683 2683 if dfh:
2684 2684 dfh.seek(0, os.SEEK_END)
2685 2685
2686 2686 curr = len(self) - 1
2687 2687 if not self._inline:
2688 2688 transaction.add(self.datafile, offset)
2689 2689 transaction.add(self.indexfile, curr * len(entry))
2690 2690 if data[0]:
2691 2691 dfh.write(data[0])
2692 2692 dfh.write(data[1])
2693 2693 ifh.write(entry)
2694 2694 else:
2695 2695 offset += curr * self._io.size
2696 2696 transaction.add(self.indexfile, offset, curr)
2697 2697 ifh.write(entry)
2698 2698 ifh.write(data[0])
2699 2699 ifh.write(data[1])
2700 2700 self._enforceinlinesize(transaction, ifh)
2701 2701
2702 2702 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2703 2703 """
2704 2704 add a delta group
2705 2705
2706 2706 given a set of deltas, add them to the revision log. the
2707 2707 first delta is against its parent, which should be in our
2708 2708 log, the rest are against the previous delta.
2709 2709
2710 2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2711 2711 this revlog and the node that was added.
2712 2712 """
2713 2713
2714 2714 nodes = []
2715 2715
2716 2716 r = len(self)
2717 2717 end = 0
2718 2718 if r:
2719 2719 end = self.end(r - 1)
2720 2720 ifh = self._indexfp("a+")
2721 2721 isize = r * self._io.size
2722 2722 if self._inline:
2723 2723 transaction.add(self.indexfile, end + isize, r)
2724 2724 dfh = None
2725 2725 else:
2726 2726 transaction.add(self.indexfile, isize, r)
2727 2727 transaction.add(self.datafile, end)
2728 2728 dfh = self._datafp("a+")
2729 2729 def flush():
2730 2730 if dfh:
2731 2731 dfh.flush()
2732 2732 ifh.flush()
2733 2733 try:
2734 2734 deltacomputer = _deltacomputer(self)
2735 2735 # loop through our set of deltas
2736 2736 for data in deltas:
2737 2737 node, p1, p2, linknode, deltabase, delta, flags = data
2738 2738 link = linkmapper(linknode)
2739 2739 flags = flags or REVIDX_DEFAULT_FLAGS
2740 2740
2741 2741 nodes.append(node)
2742 2742
2743 2743 if node in self.nodemap:
2744 2744 # this can happen if two branches make the same change
2745 2745 continue
2746 2746
2747 2747 for p in (p1, p2):
2748 2748 if p not in self.nodemap:
2749 2749 raise LookupError(p, self.indexfile,
2750 2750 _('unknown parent'))
2751 2751
2752 2752 if deltabase not in self.nodemap:
2753 2753 raise LookupError(deltabase, self.indexfile,
2754 2754 _('unknown delta base'))
2755 2755
2756 2756 baserev = self.rev(deltabase)
2757 2757
2758 2758 if baserev != nullrev and self.iscensored(baserev):
2759 2759 # if base is censored, delta must be full replacement in a
2760 2760 # single patch operation
2761 2761 hlen = struct.calcsize(">lll")
2762 2762 oldlen = self.rawsize(baserev)
2763 2763 newlen = len(delta) - hlen
2764 2764 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2765 2765 raise error.CensoredBaseError(self.indexfile,
2766 2766 self.node(baserev))
2767 2767
2768 2768 if not flags and self._peek_iscensored(baserev, delta, flush):
2769 2769 flags |= REVIDX_ISCENSORED
2770 2770
2771 2771 # We assume consumers of addrevisioncb will want to retrieve
2772 2772 # the added revision, which will require a call to
2773 2773 # revision(). revision() will fast path if there is a cache
2774 2774 # hit. So, we tell _addrevision() to always cache in this case.
2775 2775 # We're only using addgroup() in the context of changegroup
2776 2776 # generation so the revision data can always be handled as raw
2777 2777 # by the flagprocessor.
2778 2778 self._addrevision(node, None, transaction, link,
2779 2779 p1, p2, flags, (baserev, delta),
2780 2780 ifh, dfh,
2781 2781 alwayscache=bool(addrevisioncb),
2782 2782 deltacomputer=deltacomputer)
2783 2783
2784 2784 if addrevisioncb:
2785 2785 addrevisioncb(self, node)
2786 2786
2787 2787 if not dfh and not self._inline:
2788 2788 # addrevision switched from inline to conventional
2789 2789 # reopen the index
2790 2790 ifh.close()
2791 2791 dfh = self._datafp("a+")
2792 2792 ifh = self._indexfp("a+")
2793 2793 finally:
2794 2794 if dfh:
2795 2795 dfh.close()
2796 2796 ifh.close()
2797 2797
2798 2798 return nodes
2799 2799
2800 2800 def iscensored(self, rev):
2801 2801 """Check if a file revision is censored."""
2802 2802 if not self._censorable:
2803 2803 return False
2804 2804
2805 2805 return self.flags(rev) & REVIDX_ISCENSORED
2806 2806
2807 2807 def _peek_iscensored(self, baserev, delta, flush):
2808 2808 """Quickly check if a delta produces a censored revision."""
2809 2809 if not self._censorable:
2810 2810 return False
2811 2811
2812 2812 # Fragile heuristic: unless new file meta keys are added alphabetically
2813 2813 # preceding "censored", all censored revisions are prefixed by
2814 2814 # "\1\ncensored:". A delta producing such a censored revision must be a
2815 2815 # full-replacement delta, so we inspect the first and only patch in the
2816 2816 # delta for this prefix.
2817 2817 hlen = struct.calcsize(">lll")
2818 2818 if len(delta) <= hlen:
2819 2819 return False
2820 2820
2821 2821 oldlen = self.rawsize(baserev)
2822 2822 newlen = len(delta) - hlen
2823 2823 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2824 2824 return False
2825 2825
2826 2826 add = "\1\ncensored:"
2827 2827 addlen = len(add)
2828 2828 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2829 2829
2830 2830 def getstrippoint(self, minlink):
2831 2831 """find the minimum rev that must be stripped to strip the linkrev
2832 2832
2833 2833 Returns a tuple containing the minimum rev and a set of all revs that
2834 2834 have linkrevs that will be broken by this strip.
2835 2835 """
2836 2836 brokenrevs = set()
2837 2837 strippoint = len(self)
2838 2838
2839 2839 heads = {}
2840 2840 futurelargelinkrevs = set()
2841 2841 for head in self.headrevs():
2842 2842 headlinkrev = self.linkrev(head)
2843 2843 heads[head] = headlinkrev
2844 2844 if headlinkrev >= minlink:
2845 2845 futurelargelinkrevs.add(headlinkrev)
2846 2846
2847 2847 # This algorithm involves walking down the rev graph, starting at the
2848 2848 # heads. Since the revs are topologically sorted according to linkrev,
2849 2849 # once all head linkrevs are below the minlink, we know there are
2850 2850 # no more revs that could have a linkrev greater than minlink.
2851 2851 # So we can stop walking.
2852 2852 while futurelargelinkrevs:
2853 2853 strippoint -= 1
2854 2854 linkrev = heads.pop(strippoint)
2855 2855
2856 2856 if linkrev < minlink:
2857 2857 brokenrevs.add(strippoint)
2858 2858 else:
2859 2859 futurelargelinkrevs.remove(linkrev)
2860 2860
2861 2861 for p in self.parentrevs(strippoint):
2862 2862 if p != nullrev:
2863 2863 plinkrev = self.linkrev(p)
2864 2864 heads[p] = plinkrev
2865 2865 if plinkrev >= minlink:
2866 2866 futurelargelinkrevs.add(plinkrev)
2867 2867
2868 2868 return strippoint, brokenrevs
2869 2869
2870 2870 def strip(self, minlink, transaction):
2871 2871 """truncate the revlog on the first revision with a linkrev >= minlink
2872 2872
2873 2873 This function is called when we're stripping revision minlink and
2874 2874 its descendants from the repository.
2875 2875
2876 2876 We have to remove all revisions with linkrev >= minlink, because
2877 2877 the equivalent changelog revisions will be renumbered after the
2878 2878 strip.
2879 2879
2880 2880 So we truncate the revlog on the first of these revisions, and
2881 2881 trust that the caller has saved the revisions that shouldn't be
2882 2882 removed and that it'll re-add them after this truncation.
2883 2883 """
2884 2884 if len(self) == 0:
2885 2885 return
2886 2886
2887 2887 rev, _ = self.getstrippoint(minlink)
2888 2888 if rev == len(self):
2889 2889 return
2890 2890
2891 2891 # first truncate the files on disk
2892 2892 end = self.start(rev)
2893 2893 if not self._inline:
2894 2894 transaction.add(self.datafile, end)
2895 2895 end = rev * self._io.size
2896 2896 else:
2897 2897 end += rev * self._io.size
2898 2898
2899 2899 transaction.add(self.indexfile, end)
2900 2900
2901 2901 # then reset internal state in memory to forget those revisions
2902 2902 self._cache = None
2903 2903 self._chaininfocache = {}
2904 2904 self._chunkclear()
2905 2905 for x in pycompat.xrange(rev, len(self)):
2906 2906 del self.nodemap[self.node(x)]
2907 2907
2908 2908 del self.index[rev:-1]
2909 2909 self._nodepos = None
2910 2910
2911 2911 def checksize(self):
2912 2912 expected = 0
2913 2913 if len(self):
2914 2914 expected = max(0, self.end(len(self) - 1))
2915 2915
2916 2916 try:
2917 2917 with self._datafp() as f:
2918 2918 f.seek(0, 2)
2919 2919 actual = f.tell()
2920 2920 dd = actual - expected
2921 2921 except IOError as inst:
2922 2922 if inst.errno != errno.ENOENT:
2923 2923 raise
2924 2924 dd = 0
2925 2925
2926 2926 try:
2927 2927 f = self.opener(self.indexfile)
2928 2928 f.seek(0, 2)
2929 2929 actual = f.tell()
2930 2930 f.close()
2931 2931 s = self._io.size
2932 2932 i = max(0, actual // s)
2933 2933 di = actual - (i * s)
2934 2934 if self._inline:
2935 2935 databytes = 0
2936 2936 for r in self:
2937 2937 databytes += max(0, self.length(r))
2938 2938 dd = 0
2939 2939 di = actual - len(self) * s - databytes
2940 2940 except IOError as inst:
2941 2941 if inst.errno != errno.ENOENT:
2942 2942 raise
2943 2943 di = 0
2944 2944
2945 2945 return (dd, di)
2946 2946
2947 2947 def files(self):
2948 2948 res = [self.indexfile]
2949 2949 if not self._inline:
2950 2950 res.append(self.datafile)
2951 2951 return res
2952 2952
2953 2953 DELTAREUSEALWAYS = 'always'
2954 2954 DELTAREUSESAMEREVS = 'samerevs'
2955 2955 DELTAREUSENEVER = 'never'
2956 2956
2957 2957 DELTAREUSEFULLADD = 'fulladd'
2958 2958
2959 2959 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2960 2960
2961 2961 def clone(self, tr, destrevlog, addrevisioncb=None,
2962 2962 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2963 2963 """Copy this revlog to another, possibly with format changes.
2964 2964
2965 2965 The destination revlog will contain the same revisions and nodes.
2966 2966 However, it may not be bit-for-bit identical due to e.g. delta encoding
2967 2967 differences.
2968 2968
2969 2969 The ``deltareuse`` argument control how deltas from the existing revlog
2970 2970 are preserved in the destination revlog. The argument can have the
2971 2971 following values:
2972 2972
2973 2973 DELTAREUSEALWAYS
2974 2974 Deltas will always be reused (if possible), even if the destination
2975 2975 revlog would not select the same revisions for the delta. This is the
2976 2976 fastest mode of operation.
2977 2977 DELTAREUSESAMEREVS
2978 2978 Deltas will be reused if the destination revlog would pick the same
2979 2979 revisions for the delta. This mode strikes a balance between speed
2980 2980 and optimization.
2981 2981 DELTAREUSENEVER
2982 2982 Deltas will never be reused. This is the slowest mode of execution.
2983 2983 This mode can be used to recompute deltas (e.g. if the diff/delta
2984 2984 algorithm changes).
2985 2985
2986 2986 Delta computation can be slow, so the choice of delta reuse policy can
2987 2987 significantly affect run time.
2988 2988
2989 2989 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2990 2990 two extremes. Deltas will be reused if they are appropriate. But if the
2991 2991 delta could choose a better revision, it will do so. This means if you
2992 2992 are converting a non-generaldelta revlog to a generaldelta revlog,
2993 2993 deltas will be recomputed if the delta's parent isn't a parent of the
2994 2994 revision.
2995 2995
2996 2996 In addition to the delta policy, the ``deltabothparents`` argument
2997 2997 controls whether to compute deltas against both parents for merges.
2998 2998 By default, the current default is used.
2999 2999 """
3000 3000 if deltareuse not in self.DELTAREUSEALL:
3001 3001 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
3002 3002
3003 3003 if len(destrevlog):
3004 3004 raise ValueError(_('destination revlog is not empty'))
3005 3005
3006 3006 if getattr(self, 'filteredrevs', None):
3007 3007 raise ValueError(_('source revlog has filtered revisions'))
3008 3008 if getattr(destrevlog, 'filteredrevs', None):
3009 3009 raise ValueError(_('destination revlog has filtered revisions'))
3010 3010
3011 3011 # lazydeltabase controls whether to reuse a cached delta, if possible.
3012 3012 oldlazydeltabase = destrevlog._lazydeltabase
3013 3013 oldamd = destrevlog._deltabothparents
3014 3014
3015 3015 try:
3016 3016 if deltareuse == self.DELTAREUSEALWAYS:
3017 3017 destrevlog._lazydeltabase = True
3018 3018 elif deltareuse == self.DELTAREUSESAMEREVS:
3019 3019 destrevlog._lazydeltabase = False
3020 3020
3021 3021 destrevlog._deltabothparents = deltabothparents or oldamd
3022 3022
3023 3023 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3024 3024 self.DELTAREUSESAMEREVS)
3025 3025
3026 3026 deltacomputer = _deltacomputer(destrevlog)
3027 3027 index = self.index
3028 3028 for rev in self:
3029 3029 entry = index[rev]
3030 3030
3031 3031 # Some classes override linkrev to take filtered revs into
3032 3032 # account. Use raw entry from index.
3033 3033 flags = entry[0] & 0xffff
3034 3034 linkrev = entry[4]
3035 3035 p1 = index[entry[5]][7]
3036 3036 p2 = index[entry[6]][7]
3037 3037 node = entry[7]
3038 3038
3039 3039 # (Possibly) reuse the delta from the revlog if allowed and
3040 3040 # the revlog chunk is a delta.
3041 3041 cachedelta = None
3042 3042 rawtext = None
3043 3043 if populatecachedelta:
3044 3044 dp = self.deltaparent(rev)
3045 3045 if dp != nullrev:
3046 3046 cachedelta = (dp, bytes(self._chunk(rev)))
3047 3047
3048 3048 if not cachedelta:
3049 3049 rawtext = self.revision(rev, raw=True)
3050 3050
3051 3051
3052 3052 if deltareuse == self.DELTAREUSEFULLADD:
3053 3053 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3054 3054 cachedelta=cachedelta,
3055 3055 node=node, flags=flags,
3056 3056 deltacomputer=deltacomputer)
3057 3057 else:
3058 3058 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3059 3059 checkambig=False)
3060 3060 dfh = None
3061 3061 if not destrevlog._inline:
3062 3062 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3063 3063 try:
3064 3064 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3065 3065 p2, flags, cachedelta, ifh, dfh,
3066 3066 deltacomputer=deltacomputer)
3067 3067 finally:
3068 3068 if dfh:
3069 3069 dfh.close()
3070 3070 ifh.close()
3071 3071
3072 3072 if addrevisioncb:
3073 3073 addrevisioncb(self, rev, node)
3074 3074 finally:
3075 3075 destrevlog._lazydeltabase = oldlazydeltabase
3076 3076 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now