##// END OF EJS Templates
sparse-revlog: implement algorithm to write sparse delta chains (issue5480)...
Paul Morelle -
r38740:f8762ea7 default
parent child Browse files
Show More
@@ -1,2910 +1,2960
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 class _testrevlog(object):
200 200 """minimalist fake revlog to use in doctests"""
201 201
202 202 def __init__(self, data, density=0.5, mingap=0):
203 203 """data is an list of revision payload boundaries"""
204 204 self._data = data
205 205 self._srdensitythreshold = density
206 206 self._srmingapsize = mingap
207 207
208 208 def start(self, rev):
209 209 if rev == 0:
210 210 return 0
211 211 return self._data[rev - 1]
212 212
213 213 def end(self, rev):
214 214 return self._data[rev]
215 215
216 216 def length(self, rev):
217 217 return self.end(rev) - self.start(rev)
218 218
219 def __len__(self):
220 return len(self._data)
221
219 222 def _trimchunk(revlog, revs, startidx, endidx=None):
220 223 """returns revs[startidx:endidx] without empty trailing revs
221 224
222 225 Doctest Setup
223 226 >>> revlog = _testrevlog([
224 227 ... 5, #0
225 228 ... 10, #1
226 229 ... 12, #2
227 230 ... 12, #3 (empty)
228 231 ... 17, #4
229 232 ... 21, #5
230 233 ... 21, #6 (empty)
231 234 ... ])
232 235
233 236 Contiguous cases:
234 237 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 238 [0, 1, 2, 3, 4, 5]
236 239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 240 [0, 1, 2, 3, 4]
238 241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 242 [0, 1, 2]
240 243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 244 [2]
242 245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 246 [3, 4, 5]
244 247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 248 [3, 4]
246 249
247 250 Discontiguous cases:
248 251 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 252 [1, 3, 5]
250 253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 254 [1]
252 255 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 256 [3, 5]
254 257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 258 [3, 5]
256 259 """
257 260 length = revlog.length
258 261
259 262 if endidx is None:
260 263 endidx = len(revs)
261 264
262 265 # Trim empty revs at the end, but never the very first revision of a chain
263 266 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 267 endidx -= 1
265 268
266 269 return revs[startidx:endidx]
267 270
268 271 def _segmentspan(revlog, revs):
269 272 """Get the byte span of a segment of revisions
270 273
271 274 revs is a sorted array of revision numbers
272 275
273 276 >>> revlog = _testrevlog([
274 277 ... 5, #0
275 278 ... 10, #1
276 279 ... 12, #2
277 280 ... 12, #3 (empty)
278 281 ... 17, #4
279 282 ... ])
280 283
281 284 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 285 17
283 286 >>> _segmentspan(revlog, [0, 4])
284 287 17
285 288 >>> _segmentspan(revlog, [3, 4])
286 289 5
287 290 >>> _segmentspan(revlog, [1, 2, 3,])
288 291 7
289 292 >>> _segmentspan(revlog, [1, 3])
290 293 7
291 294 """
292 295 if not revs:
293 296 return 0
294 297 return revlog.end(revs[-1]) - revlog.start(revs[0])
295 298
296 def _slicechunk(revlog, revs, targetsize=None):
299 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
297 300 """slice revs to reduce the amount of unrelated data to be read from disk.
298 301
299 302 ``revs`` is sliced into groups that should be read in one time.
300 303 Assume that revs are sorted.
301 304
302 305 The initial chunk is sliced until the overall density (payload/chunks-span
303 306 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 307 `revlog._srmingapsize` is skipped.
305 308
306 309 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
307 310 For consistency with other slicing choice, this limit won't go lower than
308 311 `revlog._srmingapsize`.
309 312
310 313 If individual revisions chunk are larger than this limit, they will still
311 314 be raised individually.
312 315
313 316 >>> revlog = _testrevlog([
314 317 ... 5, #00 (5)
315 318 ... 10, #01 (5)
316 319 ... 12, #02 (2)
317 320 ... 12, #03 (empty)
318 321 ... 27, #04 (15)
319 322 ... 31, #05 (4)
320 323 ... 31, #06 (empty)
321 324 ... 42, #07 (11)
322 325 ... 47, #08 (5)
323 326 ... 47, #09 (empty)
324 327 ... 48, #10 (1)
325 328 ... 51, #11 (3)
326 329 ... 74, #12 (23)
327 330 ... 85, #13 (11)
328 331 ... 86, #14 (1)
329 332 ... 91, #15 (5)
330 333 ... ])
331 334
332 335 >>> list(_slicechunk(revlog, list(range(16))))
333 336 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 337 >>> list(_slicechunk(revlog, [0, 15]))
335 338 [[0], [15]]
336 339 >>> list(_slicechunk(revlog, [0, 11, 15]))
337 340 [[0], [11], [15]]
338 341 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
339 342 [[0], [11, 13, 15]]
340 343 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 344 [[1, 2], [5, 8, 10, 11], [14]]
342 345
343 346 Slicing with a maximum chunk size
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
347 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
345 348 [[0], [11], [13], [15]]
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
349 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
347 350 [[0], [11], [13, 15]]
348 351 """
349 352 if targetsize is not None:
350 353 targetsize = max(targetsize, revlog._srmingapsize)
354 # targetsize should not be specified when evaluating delta candidates:
355 # * targetsize is used to ensure we stay within specification when reading,
356 # * deltainfo is used to pick are good delta chain when writing.
357 if not (deltainfo is None or targetsize is None):
358 msg = 'cannot use `targetsize` with a `deltainfo`'
359 raise error.ProgrammingError(msg)
351 360 for chunk in _slicechunktodensity(revlog, revs,
361 deltainfo,
352 362 revlog._srdensitythreshold,
353 363 revlog._srmingapsize):
354 364 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
355 365 yield subchunk
356 366
357 def _slicechunktosize(revlog, revs, targetsize):
367 def _slicechunktosize(revlog, revs, targetsize=None):
358 368 """slice revs to match the target size
359 369
360 370 This is intended to be used on chunk that density slicing selected by that
361 371 are still too large compared to the read garantee of revlog. This might
362 372 happens when "minimal gap size" interrupted the slicing or when chain are
363 373 built in a way that create large blocks next to each other.
364 374
365 375 >>> revlog = _testrevlog([
366 376 ... 3, #0 (3)
367 377 ... 5, #1 (2)
368 378 ... 6, #2 (1)
369 379 ... 8, #3 (2)
370 380 ... 8, #4 (empty)
371 381 ... 11, #5 (3)
372 382 ... 12, #6 (1)
373 383 ... 13, #7 (1)
374 384 ... 14, #8 (1)
375 385 ... ])
376 386
377 387 Cases where chunk is already small enough
378 388 >>> list(_slicechunktosize(revlog, [0], 3))
379 389 [[0]]
380 390 >>> list(_slicechunktosize(revlog, [6, 7], 3))
381 391 [[6, 7]]
382 392 >>> list(_slicechunktosize(revlog, [0], None))
383 393 [[0]]
384 394 >>> list(_slicechunktosize(revlog, [6, 7], None))
385 395 [[6, 7]]
386 396
387 397 cases where we need actual slicing
388 398 >>> list(_slicechunktosize(revlog, [0, 1], 3))
389 399 [[0], [1]]
390 400 >>> list(_slicechunktosize(revlog, [1, 3], 3))
391 401 [[1], [3]]
392 402 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
393 403 [[1, 2], [3]]
394 404 >>> list(_slicechunktosize(revlog, [3, 5], 3))
395 405 [[3], [5]]
396 406 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
397 407 [[3], [5]]
398 408 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
399 409 [[5], [6, 7, 8]]
400 410 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
401 411 [[0], [1, 2], [3], [5], [6, 7, 8]]
402 412
403 413 Case with too large individual chunk (must return valid chunk)
404 414 >>> list(_slicechunktosize(revlog, [0, 1], 2))
405 415 [[0], [1]]
406 416 >>> list(_slicechunktosize(revlog, [1, 3], 1))
407 417 [[1], [3]]
408 418 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
409 419 [[3], [5]]
410 420 """
411 421 assert targetsize is None or 0 <= targetsize
412 422 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
413 423 yield revs
414 424 return
415 425
416 426 startrevidx = 0
417 427 startdata = revlog.start(revs[0])
418 428 endrevidx = 0
419 429 iterrevs = enumerate(revs)
420 430 next(iterrevs) # skip first rev.
421 431 for idx, r in iterrevs:
422 432 span = revlog.end(r) - startdata
423 433 if span <= targetsize:
424 434 endrevidx = idx
425 435 else:
426 436 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
427 437 if chunk:
428 438 yield chunk
429 439 startrevidx = idx
430 440 startdata = revlog.start(r)
431 441 endrevidx = idx
432 442 yield _trimchunk(revlog, revs, startrevidx)
433 443
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 mingapsize=0):
435 446 """slice revs to reduce the amount of unrelated data to be read from disk.
436 447
437 448 ``revs`` is sliced into groups that should be read in one time.
438 449 Assume that revs are sorted.
439 450
451 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 to the top of the revlog.
453
440 454 The initial chunk is sliced until the overall density (payload/chunks-span
441 455 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
442 456 skipped.
443 457
444 458 >>> revlog = _testrevlog([
445 459 ... 5, #00 (5)
446 460 ... 10, #01 (5)
447 461 ... 12, #02 (2)
448 462 ... 12, #03 (empty)
449 463 ... 27, #04 (15)
450 464 ... 31, #05 (4)
451 465 ... 31, #06 (empty)
452 466 ... 42, #07 (11)
453 467 ... 47, #08 (5)
454 468 ... 47, #09 (empty)
455 469 ... 48, #10 (1)
456 470 ... 51, #11 (3)
457 471 ... 74, #12 (23)
458 472 ... 85, #13 (11)
459 473 ... 86, #14 (1)
460 474 ... 91, #15 (5)
461 475 ... ])
462 476
463 477 >>> list(_slicechunktodensity(revlog, list(range(16))))
464 478 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
465 479 >>> list(_slicechunktodensity(revlog, [0, 15]))
466 480 [[0], [15]]
467 481 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
468 482 [[0], [11], [15]]
469 483 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
470 484 [[0], [11, 13, 15]]
471 485 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
472 486 [[1, 2], [5, 8, 10, 11], [14]]
473 487 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
474 488 ... mingapsize=20))
475 489 [[1, 2, 3, 5, 8, 10, 11], [14]]
476 490 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
477 491 ... targetdensity=0.95))
478 492 [[1, 2], [5], [8, 10, 11], [14]]
479 493 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
480 494 ... targetdensity=0.95, mingapsize=12))
481 495 [[1, 2], [5, 8, 10, 11], [14]]
482 496 """
483 497 start = revlog.start
484 498 length = revlog.length
485 499
486 500 if len(revs) <= 1:
487 501 yield revs
488 502 return
489 503
490 readdata = deltachainspan = _segmentspan(revlog, revs)
504 nextrev = len(revlog)
505 nextoffset = revlog.end(nextrev - 1)
506
507 if deltainfo is None:
508 deltachainspan = _segmentspan(revlog, revs)
509 chainpayload = sum(length(r) for r in revs)
510 else:
511 deltachainspan = deltainfo.distance
512 chainpayload = deltainfo.compresseddeltalen
491 513
492 514 if deltachainspan < mingapsize:
493 515 yield revs
494 516 return
495 517
496 chainpayload = sum(length(r) for r in revs)
518 readdata = deltachainspan
497 519
498 520 if deltachainspan:
499 521 density = chainpayload / float(deltachainspan)
500 522 else:
501 523 density = 1.0
502 524
503 525 if density >= targetdensity:
504 526 yield revs
505 527 return
506 528
529 if deltainfo is not None:
530 revs = list(revs)
531 revs.append(nextrev)
532
507 533 # Store the gaps in a heap to have them sorted by decreasing size
508 534 gapsheap = []
509 535 heapq.heapify(gapsheap)
510 536 prevend = None
511 537 for i, rev in enumerate(revs):
512 revstart = start(rev)
513 revlen = length(rev)
538 if rev < nextrev:
539 revstart = start(rev)
540 revlen = length(rev)
541 else:
542 revstart = nextoffset
543 revlen = deltainfo.deltalen
514 544
515 545 # Skip empty revisions to form larger holes
516 546 if revlen == 0:
517 547 continue
518 548
519 549 if prevend is not None:
520 550 gapsize = revstart - prevend
521 551 # only consider holes that are large enough
522 552 if gapsize > mingapsize:
523 553 heapq.heappush(gapsheap, (-gapsize, i))
524 554
525 555 prevend = revstart + revlen
526 556
527 557 # Collect the indices of the largest holes until the density is acceptable
528 558 indicesheap = []
529 559 heapq.heapify(indicesheap)
530 560 while gapsheap and density < targetdensity:
531 561 oppgapsize, gapidx = heapq.heappop(gapsheap)
532 562
533 563 heapq.heappush(indicesheap, gapidx)
534 564
535 565 # the gap sizes are stored as negatives to be sorted decreasingly
536 566 # by the heap
537 567 readdata -= (-oppgapsize)
538 568 if readdata > 0:
539 569 density = chainpayload / float(readdata)
540 570 else:
541 571 density = 1.0
542 572
543 573 # Cut the revs at collected indices
544 574 previdx = 0
545 575 while indicesheap:
546 576 idx = heapq.heappop(indicesheap)
547 577
548 578 chunk = _trimchunk(revlog, revs, previdx, idx)
549 579 if chunk:
550 580 yield chunk
551 581
552 582 previdx = idx
553 583
554 584 chunk = _trimchunk(revlog, revs, previdx)
555 585 if chunk:
556 586 yield chunk
557 587
558 588 @attr.s(slots=True, frozen=True)
559 589 class _deltainfo(object):
560 590 distance = attr.ib()
561 591 deltalen = attr.ib()
562 592 data = attr.ib()
563 593 base = attr.ib()
564 594 chainbase = attr.ib()
565 595 chainlen = attr.ib()
566 596 compresseddeltalen = attr.ib()
567 597
568 598 class _deltacomputer(object):
569 599 def __init__(self, revlog):
570 600 self.revlog = revlog
571 601
572 602 def _getcandidaterevs(self, p1, p2, cachedelta):
573 603 """
574 604 Provides revisions that present an interest to be diffed against,
575 605 grouped by level of easiness.
576 606 """
577 607 revlog = self.revlog
578 608 gdelta = revlog._generaldelta
579 609 curr = len(revlog)
580 610 prev = curr - 1
581 611 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
582 612
583 613 # should we try to build a delta?
584 614 if prev != nullrev and revlog.storedeltachains:
585 615 tested = set()
586 616 # This condition is true most of the time when processing
587 617 # changegroup data into a generaldelta repo. The only time it
588 618 # isn't true is if this is the first revision in a delta chain
589 619 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
590 620 if cachedelta and gdelta and revlog._lazydeltabase:
591 621 # Assume what we received from the server is a good choice
592 622 # build delta will reuse the cache
593 623 yield (cachedelta[0],)
594 624 tested.add(cachedelta[0])
595 625
596 626 if gdelta:
597 627 # exclude already lazy tested base if any
598 628 parents = [p for p in (p1r, p2r)
599 629 if p != nullrev and p not in tested]
600 630
601 631 if not revlog._aggressivemergedeltas and len(parents) == 2:
602 632 parents.sort()
603 633 # To minimize the chance of having to build a fulltext,
604 634 # pick first whichever parent is closest to us (max rev)
605 635 yield (parents[1],)
606 636 # then the other one (min rev) if the first did not fit
607 637 yield (parents[0],)
608 638 tested.update(parents)
609 639 elif len(parents) > 0:
610 640 # Test all parents (1 or 2), and keep the best candidate
611 641 yield parents
612 642 tested.update(parents)
613 643
614 644 if prev not in tested:
615 645 # other approach failed try against prev to hopefully save us a
616 646 # fulltext.
617 647 yield (prev,)
618 648 tested.add(prev)
619 649
620 650 def buildtext(self, revinfo, fh):
621 651 """Builds a fulltext version of a revision
622 652
623 653 revinfo: _revisioninfo instance that contains all needed info
624 654 fh: file handle to either the .i or the .d revlog file,
625 655 depending on whether it is inlined or not
626 656 """
627 657 btext = revinfo.btext
628 658 if btext[0] is not None:
629 659 return btext[0]
630 660
631 661 revlog = self.revlog
632 662 cachedelta = revinfo.cachedelta
633 663 flags = revinfo.flags
634 664 node = revinfo.node
635 665
636 666 baserev = cachedelta[0]
637 667 delta = cachedelta[1]
638 668 # special case deltas which replace entire base; no need to decode
639 669 # base revision. this neatly avoids censored bases, which throw when
640 670 # they're decoded.
641 671 hlen = struct.calcsize(">lll")
642 672 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
643 673 len(delta) - hlen):
644 674 btext[0] = delta[hlen:]
645 675 else:
646 676 # deltabase is rawtext before changed by flag processors, which is
647 677 # equivalent to non-raw text
648 678 basetext = revlog.revision(baserev, _df=fh, raw=False)
649 679 btext[0] = mdiff.patch(basetext, delta)
650 680
651 681 try:
652 682 res = revlog._processflags(btext[0], flags, 'read', raw=True)
653 683 btext[0], validatehash = res
654 684 if validatehash:
655 685 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
656 686 if flags & REVIDX_ISCENSORED:
657 687 raise RevlogError(_('node %s is not censored') % node)
658 688 except CensoredNodeError:
659 689 # must pass the censored index flag to add censored revisions
660 690 if not flags & REVIDX_ISCENSORED:
661 691 raise
662 692 return btext[0]
663 693
664 694 def _builddeltadiff(self, base, revinfo, fh):
665 695 revlog = self.revlog
666 696 t = self.buildtext(revinfo, fh)
667 697 if revlog.iscensored(base):
668 698 # deltas based on a censored revision must replace the
669 699 # full content in one patch, so delta works everywhere
670 700 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
671 701 delta = header + t
672 702 else:
673 703 ptext = revlog.revision(base, _df=fh, raw=True)
674 704 delta = mdiff.textdiff(ptext, t)
675 705
676 706 return delta
677 707
678 708 def _builddeltainfo(self, revinfo, base, fh):
679 709 # can we use the cached delta?
680 710 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
681 711 delta = revinfo.cachedelta[1]
682 712 else:
683 713 delta = self._builddeltadiff(base, revinfo, fh)
684 714 revlog = self.revlog
685 715 header, data = revlog.compress(delta)
686 716 deltalen = len(header) + len(data)
687 717 chainbase = revlog.chainbase(base)
688 718 offset = revlog.end(len(revlog) - 1)
689 719 dist = deltalen + offset - revlog.start(chainbase)
690 720 if revlog._generaldelta:
691 721 deltabase = base
692 722 else:
693 723 deltabase = chainbase
694 724 chainlen, compresseddeltalen = revlog._chaininfo(base)
695 725 chainlen += 1
696 726 compresseddeltalen += deltalen
697 727 return _deltainfo(dist, deltalen, (header, data), deltabase,
698 728 chainbase, chainlen, compresseddeltalen)
699 729
700 730 def finddeltainfo(self, revinfo, fh):
701 731 """Find an acceptable delta against a candidate revision
702 732
703 733 revinfo: information about the revision (instance of _revisioninfo)
704 734 fh: file handle to either the .i or the .d revlog file,
705 735 depending on whether it is inlined or not
706 736
707 737 Returns the first acceptable candidate revision, as ordered by
708 738 _getcandidaterevs
709 739 """
710 740 cachedelta = revinfo.cachedelta
711 741 p1 = revinfo.p1
712 742 p2 = revinfo.p2
713 743 revlog = self.revlog
714 744
715 745 deltainfo = None
716 746 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
717 747 nominateddeltas = []
718 748 for candidaterev in candidaterevs:
719 749 # no delta for rawtext-changing revs (see "candelta" for why)
720 750 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
721 751 continue
722 752 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
723 753 if revlog._isgooddeltainfo(candidatedelta, revinfo):
724 754 nominateddeltas.append(candidatedelta)
725 755 if nominateddeltas:
726 756 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
727 757 break
728 758
729 759 return deltainfo
730 760
731 761 @attr.s(slots=True, frozen=True)
732 762 class _revisioninfo(object):
733 763 """Information about a revision that allows building its fulltext
734 764 node: expected hash of the revision
735 765 p1, p2: parent revs of the revision
736 766 btext: built text cache consisting of a one-element list
737 767 cachedelta: (baserev, uncompressed_delta) or None
738 768 flags: flags associated to the revision storage
739 769
740 770 One of btext[0] or cachedelta must be set.
741 771 """
742 772 node = attr.ib()
743 773 p1 = attr.ib()
744 774 p2 = attr.ib()
745 775 btext = attr.ib()
746 776 textlen = attr.ib()
747 777 cachedelta = attr.ib()
748 778 flags = attr.ib()
749 779
750 780 # index v0:
751 781 # 4 bytes: offset
752 782 # 4 bytes: compressed length
753 783 # 4 bytes: base rev
754 784 # 4 bytes: link rev
755 785 # 20 bytes: parent 1 nodeid
756 786 # 20 bytes: parent 2 nodeid
757 787 # 20 bytes: nodeid
758 788 indexformatv0 = struct.Struct(">4l20s20s20s")
759 789 indexformatv0_pack = indexformatv0.pack
760 790 indexformatv0_unpack = indexformatv0.unpack
761 791
762 792 class revlogoldio(object):
763 793 def __init__(self):
764 794 self.size = indexformatv0.size
765 795
766 796 def parseindex(self, data, inline):
767 797 s = self.size
768 798 index = []
769 799 nodemap = {nullid: nullrev}
770 800 n = off = 0
771 801 l = len(data)
772 802 while off + s <= l:
773 803 cur = data[off:off + s]
774 804 off += s
775 805 e = indexformatv0_unpack(cur)
776 806 # transform to revlogv1 format
777 807 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
778 808 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
779 809 index.append(e2)
780 810 nodemap[e[6]] = n
781 811 n += 1
782 812
783 813 # add the magic null revision at -1
784 814 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
785 815
786 816 return index, nodemap, None
787 817
788 818 def packentry(self, entry, node, version, rev):
789 819 if gettype(entry[0]):
790 820 raise RevlogError(_('index entry flags need revlog version 1'))
791 821 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
792 822 node(entry[5]), node(entry[6]), entry[7])
793 823 return indexformatv0_pack(*e2)
794 824
795 825 # index ng:
796 826 # 6 bytes: offset
797 827 # 2 bytes: flags
798 828 # 4 bytes: compressed length
799 829 # 4 bytes: uncompressed length
800 830 # 4 bytes: base rev
801 831 # 4 bytes: link rev
802 832 # 4 bytes: parent 1 rev
803 833 # 4 bytes: parent 2 rev
804 834 # 32 bytes: nodeid
805 835 indexformatng = struct.Struct(">Qiiiiii20s12x")
806 836 indexformatng_pack = indexformatng.pack
807 837 versionformat = struct.Struct(">I")
808 838 versionformat_pack = versionformat.pack
809 839 versionformat_unpack = versionformat.unpack
810 840
811 841 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
812 842 # signed integer)
813 843 _maxentrysize = 0x7fffffff
814 844
815 845 class revlogio(object):
816 846 def __init__(self):
817 847 self.size = indexformatng.size
818 848
819 849 def parseindex(self, data, inline):
820 850 # call the C implementation to parse the index data
821 851 index, cache = parsers.parse_index2(data, inline)
822 852 return index, getattr(index, 'nodemap', None), cache
823 853
824 854 def packentry(self, entry, node, version, rev):
825 855 p = indexformatng_pack(*entry)
826 856 if rev == 0:
827 857 p = versionformat_pack(version) + p[4:]
828 858 return p
829 859
830 860 class revlog(object):
831 861 """
832 862 the underlying revision storage object
833 863
834 864 A revlog consists of two parts, an index and the revision data.
835 865
836 866 The index is a file with a fixed record size containing
837 867 information on each revision, including its nodeid (hash), the
838 868 nodeids of its parents, the position and offset of its data within
839 869 the data file, and the revision it's based on. Finally, each entry
840 870 contains a linkrev entry that can serve as a pointer to external
841 871 data.
842 872
843 873 The revision data itself is a linear collection of data chunks.
844 874 Each chunk represents a revision and is usually represented as a
845 875 delta against the previous chunk. To bound lookup time, runs of
846 876 deltas are limited to about 2 times the length of the original
847 877 version data. This makes retrieval of a version proportional to
848 878 its size, or O(1) relative to the number of revisions.
849 879
850 880 Both pieces of the revlog are written to in an append-only
851 881 fashion, which means we never need to rewrite a file to insert or
852 882 remove data, and can use some simple techniques to avoid the need
853 883 for locking while reading.
854 884
855 885 If checkambig, indexfile is opened with checkambig=True at
856 886 writing, to avoid file stat ambiguity.
857 887
858 888 If mmaplargeindex is True, and an mmapindexthreshold is set, the
859 889 index will be mmapped rather than read if it is larger than the
860 890 configured threshold.
861 891
862 892 If censorable is True, the revlog can have censored revisions.
863 893 """
864 894 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
865 895 mmaplargeindex=False, censorable=False):
866 896 """
867 897 create a revlog object
868 898
869 899 opener is a function that abstracts the file opening operation
870 900 and can be used to implement COW semantics or the like.
871 901 """
872 902 self.indexfile = indexfile
873 903 self.datafile = datafile or (indexfile[:-2] + ".d")
874 904 self.opener = opener
875 905 # When True, indexfile is opened with checkambig=True at writing, to
876 906 # avoid file stat ambiguity.
877 907 self._checkambig = checkambig
878 908 self._censorable = censorable
879 909 # 3-tuple of (node, rev, text) for a raw revision.
880 910 self._cache = None
881 911 # Maps rev to chain base rev.
882 912 self._chainbasecache = util.lrucachedict(100)
883 913 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
884 914 self._chunkcache = (0, '')
885 915 # How much data to read and cache into the raw revlog data cache.
886 916 self._chunkcachesize = 65536
887 917 self._maxchainlen = None
888 918 self._aggressivemergedeltas = True
889 919 self.index = []
890 920 # Mapping of partial identifiers to full nodes.
891 921 self._pcache = {}
892 922 # Mapping of revision integer to full node.
893 923 self._nodecache = {nullid: nullrev}
894 924 self._nodepos = None
895 925 self._compengine = 'zlib'
896 926 self._maxdeltachainspan = -1
897 927 self._withsparseread = False
898 928 self._sparserevlog = False
899 929 self._srdensitythreshold = 0.50
900 930 self._srmingapsize = 262144
901 931
902 932 mmapindexthreshold = None
903 933 v = REVLOG_DEFAULT_VERSION
904 934 opts = getattr(opener, 'options', None)
905 935 if opts is not None:
906 936 if 'revlogv2' in opts:
907 937 # version 2 revlogs always use generaldelta.
908 938 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
909 939 elif 'revlogv1' in opts:
910 940 if 'generaldelta' in opts:
911 941 v |= FLAG_GENERALDELTA
912 942 else:
913 943 v = 0
914 944 if 'chunkcachesize' in opts:
915 945 self._chunkcachesize = opts['chunkcachesize']
916 946 if 'maxchainlen' in opts:
917 947 self._maxchainlen = opts['maxchainlen']
918 948 if 'aggressivemergedeltas' in opts:
919 949 self._aggressivemergedeltas = opts['aggressivemergedeltas']
920 950 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
921 951 if 'compengine' in opts:
922 952 self._compengine = opts['compengine']
923 953 if 'maxdeltachainspan' in opts:
924 954 self._maxdeltachainspan = opts['maxdeltachainspan']
925 955 if mmaplargeindex and 'mmapindexthreshold' in opts:
926 956 mmapindexthreshold = opts['mmapindexthreshold']
927 957 self._sparserevlog = bool(opts.get('sparse-revlog', False))
928 958 withsparseread = bool(opts.get('with-sparse-read', False))
929 959 # sparse-revlog forces sparse-read
930 960 self._withsparseread = self._sparserevlog or withsparseread
931 961 if 'sparse-read-density-threshold' in opts:
932 962 self._srdensitythreshold = opts['sparse-read-density-threshold']
933 963 if 'sparse-read-min-gap-size' in opts:
934 964 self._srmingapsize = opts['sparse-read-min-gap-size']
935 965
936 966 if self._chunkcachesize <= 0:
937 967 raise RevlogError(_('revlog chunk cache size %r is not greater '
938 968 'than 0') % self._chunkcachesize)
939 969 elif self._chunkcachesize & (self._chunkcachesize - 1):
940 970 raise RevlogError(_('revlog chunk cache size %r is not a power '
941 971 'of 2') % self._chunkcachesize)
942 972
943 973 indexdata = ''
944 974 self._initempty = True
945 975 try:
946 976 with self._indexfp() as f:
947 977 if (mmapindexthreshold is not None and
948 978 self.opener.fstat(f).st_size >= mmapindexthreshold):
949 979 indexdata = util.buffer(util.mmapread(f))
950 980 else:
951 981 indexdata = f.read()
952 982 if len(indexdata) > 0:
953 983 v = versionformat_unpack(indexdata[:4])[0]
954 984 self._initempty = False
955 985 except IOError as inst:
956 986 if inst.errno != errno.ENOENT:
957 987 raise
958 988
959 989 self.version = v
960 990 self._inline = v & FLAG_INLINE_DATA
961 991 self._generaldelta = v & FLAG_GENERALDELTA
962 992 flags = v & ~0xFFFF
963 993 fmt = v & 0xFFFF
964 994 if fmt == REVLOGV0:
965 995 if flags:
966 996 raise RevlogError(_('unknown flags (%#04x) in version %d '
967 997 'revlog %s') %
968 998 (flags >> 16, fmt, self.indexfile))
969 999 elif fmt == REVLOGV1:
970 1000 if flags & ~REVLOGV1_FLAGS:
971 1001 raise RevlogError(_('unknown flags (%#04x) in version %d '
972 1002 'revlog %s') %
973 1003 (flags >> 16, fmt, self.indexfile))
974 1004 elif fmt == REVLOGV2:
975 1005 if flags & ~REVLOGV2_FLAGS:
976 1006 raise RevlogError(_('unknown flags (%#04x) in version %d '
977 1007 'revlog %s') %
978 1008 (flags >> 16, fmt, self.indexfile))
979 1009 else:
980 1010 raise RevlogError(_('unknown version (%d) in revlog %s') %
981 1011 (fmt, self.indexfile))
982 1012
983 1013 self.storedeltachains = True
984 1014
985 1015 self._io = revlogio()
986 1016 if self.version == REVLOGV0:
987 1017 self._io = revlogoldio()
988 1018 try:
989 1019 d = self._io.parseindex(indexdata, self._inline)
990 1020 except (ValueError, IndexError):
991 1021 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
992 1022 self.index, nodemap, self._chunkcache = d
993 1023 if nodemap is not None:
994 1024 self.nodemap = self._nodecache = nodemap
995 1025 if not self._chunkcache:
996 1026 self._chunkclear()
997 1027 # revnum -> (chain-length, sum-delta-length)
998 1028 self._chaininfocache = {}
999 1029 # revlog header -> revlog compressor
1000 1030 self._decompressors = {}
1001 1031
1002 1032 @util.propertycache
1003 1033 def _compressor(self):
1004 1034 return util.compengines[self._compengine].revlogcompressor()
1005 1035
1006 1036 def _indexfp(self, mode='r'):
1007 1037 """file object for the revlog's index file"""
1008 1038 args = {r'mode': mode}
1009 1039 if mode != 'r':
1010 1040 args[r'checkambig'] = self._checkambig
1011 1041 if mode == 'w':
1012 1042 args[r'atomictemp'] = True
1013 1043 return self.opener(self.indexfile, **args)
1014 1044
1015 1045 def _datafp(self, mode='r'):
1016 1046 """file object for the revlog's data file"""
1017 1047 return self.opener(self.datafile, mode=mode)
1018 1048
1019 1049 @contextlib.contextmanager
1020 1050 def _datareadfp(self, existingfp=None):
1021 1051 """file object suitable to read data"""
1022 1052 if existingfp is not None:
1023 1053 yield existingfp
1024 1054 else:
1025 1055 if self._inline:
1026 1056 func = self._indexfp
1027 1057 else:
1028 1058 func = self._datafp
1029 1059 with func() as fp:
1030 1060 yield fp
1031 1061
1032 1062 def tip(self):
1033 1063 return self.node(len(self.index) - 2)
1034 1064 def __contains__(self, rev):
1035 1065 return 0 <= rev < len(self)
1036 1066 def __len__(self):
1037 1067 return len(self.index) - 1
1038 1068 def __iter__(self):
1039 1069 return iter(xrange(len(self)))
1040 1070 def revs(self, start=0, stop=None):
1041 1071 """iterate over all rev in this revlog (from start to stop)"""
1042 1072 step = 1
1043 1073 if stop is not None:
1044 1074 if start > stop:
1045 1075 step = -1
1046 1076 stop += step
1047 1077 else:
1048 1078 stop = len(self)
1049 1079 return xrange(start, stop, step)
1050 1080
1051 1081 @util.propertycache
1052 1082 def nodemap(self):
1053 1083 self.rev(self.node(0))
1054 1084 return self._nodecache
1055 1085
1056 1086 def hasnode(self, node):
1057 1087 try:
1058 1088 self.rev(node)
1059 1089 return True
1060 1090 except KeyError:
1061 1091 return False
1062 1092
1063 1093 def candelta(self, baserev, rev):
1064 1094 """whether two revisions (baserev, rev) can be delta-ed or not"""
1065 1095 # Disable delta if either rev requires a content-changing flag
1066 1096 # processor (ex. LFS). This is because such flag processor can alter
1067 1097 # the rawtext content that the delta will be based on, and two clients
1068 1098 # could have a same revlog node with different flags (i.e. different
1069 1099 # rawtext contents) and the delta could be incompatible.
1070 1100 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1071 1101 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1072 1102 return False
1073 1103 return True
1074 1104
1075 1105 def clearcaches(self):
1076 1106 self._cache = None
1077 1107 self._chainbasecache.clear()
1078 1108 self._chunkcache = (0, '')
1079 1109 self._pcache = {}
1080 1110
1081 1111 try:
1082 1112 self._nodecache.clearcaches()
1083 1113 except AttributeError:
1084 1114 self._nodecache = {nullid: nullrev}
1085 1115 self._nodepos = None
1086 1116
1087 1117 def rev(self, node):
1088 1118 try:
1089 1119 return self._nodecache[node]
1090 1120 except TypeError:
1091 1121 raise
1092 1122 except RevlogError:
1093 1123 # parsers.c radix tree lookup failed
1094 1124 if node == wdirid or node in wdirfilenodeids:
1095 1125 raise error.WdirUnsupported
1096 1126 raise LookupError(node, self.indexfile, _('no node'))
1097 1127 except KeyError:
1098 1128 # pure python cache lookup failed
1099 1129 n = self._nodecache
1100 1130 i = self.index
1101 1131 p = self._nodepos
1102 1132 if p is None:
1103 1133 p = len(i) - 2
1104 1134 else:
1105 1135 assert p < len(i)
1106 1136 for r in xrange(p, -1, -1):
1107 1137 v = i[r][7]
1108 1138 n[v] = r
1109 1139 if v == node:
1110 1140 self._nodepos = r - 1
1111 1141 return r
1112 1142 if node == wdirid or node in wdirfilenodeids:
1113 1143 raise error.WdirUnsupported
1114 1144 raise LookupError(node, self.indexfile, _('no node'))
1115 1145
1116 1146 # Accessors for index entries.
1117 1147
1118 1148 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1119 1149 # are flags.
1120 1150 def start(self, rev):
1121 1151 return int(self.index[rev][0] >> 16)
1122 1152
1123 1153 def flags(self, rev):
1124 1154 return self.index[rev][0] & 0xFFFF
1125 1155
1126 1156 def length(self, rev):
1127 1157 return self.index[rev][1]
1128 1158
1129 1159 def rawsize(self, rev):
1130 1160 """return the length of the uncompressed text for a given revision"""
1131 1161 l = self.index[rev][2]
1132 1162 if l >= 0:
1133 1163 return l
1134 1164
1135 1165 t = self.revision(rev, raw=True)
1136 1166 return len(t)
1137 1167
1138 1168 def size(self, rev):
1139 1169 """length of non-raw text (processed by a "read" flag processor)"""
1140 1170 # fast path: if no "read" flag processor could change the content,
1141 1171 # size is rawsize. note: ELLIPSIS is known to not change the content.
1142 1172 flags = self.flags(rev)
1143 1173 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1144 1174 return self.rawsize(rev)
1145 1175
1146 1176 return len(self.revision(rev, raw=False))
1147 1177
1148 1178 def chainbase(self, rev):
1149 1179 base = self._chainbasecache.get(rev)
1150 1180 if base is not None:
1151 1181 return base
1152 1182
1153 1183 index = self.index
1154 1184 iterrev = rev
1155 1185 base = index[iterrev][3]
1156 1186 while base != iterrev:
1157 1187 iterrev = base
1158 1188 base = index[iterrev][3]
1159 1189
1160 1190 self._chainbasecache[rev] = base
1161 1191 return base
1162 1192
1163 1193 def linkrev(self, rev):
1164 1194 return self.index[rev][4]
1165 1195
1166 1196 def parentrevs(self, rev):
1167 1197 try:
1168 1198 entry = self.index[rev]
1169 1199 except IndexError:
1170 1200 if rev == wdirrev:
1171 1201 raise error.WdirUnsupported
1172 1202 raise
1173 1203
1174 1204 return entry[5], entry[6]
1175 1205
1176 1206 def node(self, rev):
1177 1207 try:
1178 1208 return self.index[rev][7]
1179 1209 except IndexError:
1180 1210 if rev == wdirrev:
1181 1211 raise error.WdirUnsupported
1182 1212 raise
1183 1213
1184 1214 # Derived from index values.
1185 1215
1186 1216 def end(self, rev):
1187 1217 return self.start(rev) + self.length(rev)
1188 1218
1189 1219 def parents(self, node):
1190 1220 i = self.index
1191 1221 d = i[self.rev(node)]
1192 1222 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1193 1223
1194 1224 def chainlen(self, rev):
1195 1225 return self._chaininfo(rev)[0]
1196 1226
1197 1227 def _chaininfo(self, rev):
1198 1228 chaininfocache = self._chaininfocache
1199 1229 if rev in chaininfocache:
1200 1230 return chaininfocache[rev]
1201 1231 index = self.index
1202 1232 generaldelta = self._generaldelta
1203 1233 iterrev = rev
1204 1234 e = index[iterrev]
1205 1235 clen = 0
1206 1236 compresseddeltalen = 0
1207 1237 while iterrev != e[3]:
1208 1238 clen += 1
1209 1239 compresseddeltalen += e[1]
1210 1240 if generaldelta:
1211 1241 iterrev = e[3]
1212 1242 else:
1213 1243 iterrev -= 1
1214 1244 if iterrev in chaininfocache:
1215 1245 t = chaininfocache[iterrev]
1216 1246 clen += t[0]
1217 1247 compresseddeltalen += t[1]
1218 1248 break
1219 1249 e = index[iterrev]
1220 1250 else:
1221 1251 # Add text length of base since decompressing that also takes
1222 1252 # work. For cache hits the length is already included.
1223 1253 compresseddeltalen += e[1]
1224 1254 r = (clen, compresseddeltalen)
1225 1255 chaininfocache[rev] = r
1226 1256 return r
1227 1257
1228 1258 def _deltachain(self, rev, stoprev=None):
1229 1259 """Obtain the delta chain for a revision.
1230 1260
1231 1261 ``stoprev`` specifies a revision to stop at. If not specified, we
1232 1262 stop at the base of the chain.
1233 1263
1234 1264 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1235 1265 revs in ascending order and ``stopped`` is a bool indicating whether
1236 1266 ``stoprev`` was hit.
1237 1267 """
1238 1268 # Try C implementation.
1239 1269 try:
1240 1270 return self.index.deltachain(rev, stoprev, self._generaldelta)
1241 1271 except AttributeError:
1242 1272 pass
1243 1273
1244 1274 chain = []
1245 1275
1246 1276 # Alias to prevent attribute lookup in tight loop.
1247 1277 index = self.index
1248 1278 generaldelta = self._generaldelta
1249 1279
1250 1280 iterrev = rev
1251 1281 e = index[iterrev]
1252 1282 while iterrev != e[3] and iterrev != stoprev:
1253 1283 chain.append(iterrev)
1254 1284 if generaldelta:
1255 1285 iterrev = e[3]
1256 1286 else:
1257 1287 iterrev -= 1
1258 1288 e = index[iterrev]
1259 1289
1260 1290 if iterrev == stoprev:
1261 1291 stopped = True
1262 1292 else:
1263 1293 chain.append(iterrev)
1264 1294 stopped = False
1265 1295
1266 1296 chain.reverse()
1267 1297 return chain, stopped
1268 1298
1269 1299 def ancestors(self, revs, stoprev=0, inclusive=False):
1270 1300 """Generate the ancestors of 'revs' in reverse topological order.
1271 1301 Does not generate revs lower than stoprev.
1272 1302
1273 1303 See the documentation for ancestor.lazyancestors for more details."""
1274 1304
1275 1305 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1276 1306 inclusive=inclusive)
1277 1307
1278 1308 def descendants(self, revs):
1279 1309 """Generate the descendants of 'revs' in revision order.
1280 1310
1281 1311 Yield a sequence of revision numbers starting with a child of
1282 1312 some rev in revs, i.e., each revision is *not* considered a
1283 1313 descendant of itself. Results are ordered by revision number (a
1284 1314 topological sort)."""
1285 1315 first = min(revs)
1286 1316 if first == nullrev:
1287 1317 for i in self:
1288 1318 yield i
1289 1319 return
1290 1320
1291 1321 seen = set(revs)
1292 1322 for i in self.revs(start=first + 1):
1293 1323 for x in self.parentrevs(i):
1294 1324 if x != nullrev and x in seen:
1295 1325 seen.add(i)
1296 1326 yield i
1297 1327 break
1298 1328
1299 1329 def findcommonmissing(self, common=None, heads=None):
1300 1330 """Return a tuple of the ancestors of common and the ancestors of heads
1301 1331 that are not ancestors of common. In revset terminology, we return the
1302 1332 tuple:
1303 1333
1304 1334 ::common, (::heads) - (::common)
1305 1335
1306 1336 The list is sorted by revision number, meaning it is
1307 1337 topologically sorted.
1308 1338
1309 1339 'heads' and 'common' are both lists of node IDs. If heads is
1310 1340 not supplied, uses all of the revlog's heads. If common is not
1311 1341 supplied, uses nullid."""
1312 1342 if common is None:
1313 1343 common = [nullid]
1314 1344 if heads is None:
1315 1345 heads = self.heads()
1316 1346
1317 1347 common = [self.rev(n) for n in common]
1318 1348 heads = [self.rev(n) for n in heads]
1319 1349
1320 1350 # we want the ancestors, but inclusive
1321 1351 class lazyset(object):
1322 1352 def __init__(self, lazyvalues):
1323 1353 self.addedvalues = set()
1324 1354 self.lazyvalues = lazyvalues
1325 1355
1326 1356 def __contains__(self, value):
1327 1357 return value in self.addedvalues or value in self.lazyvalues
1328 1358
1329 1359 def __iter__(self):
1330 1360 added = self.addedvalues
1331 1361 for r in added:
1332 1362 yield r
1333 1363 for r in self.lazyvalues:
1334 1364 if not r in added:
1335 1365 yield r
1336 1366
1337 1367 def add(self, value):
1338 1368 self.addedvalues.add(value)
1339 1369
1340 1370 def update(self, values):
1341 1371 self.addedvalues.update(values)
1342 1372
1343 1373 has = lazyset(self.ancestors(common))
1344 1374 has.add(nullrev)
1345 1375 has.update(common)
1346 1376
1347 1377 # take all ancestors from heads that aren't in has
1348 1378 missing = set()
1349 1379 visit = collections.deque(r for r in heads if r not in has)
1350 1380 while visit:
1351 1381 r = visit.popleft()
1352 1382 if r in missing:
1353 1383 continue
1354 1384 else:
1355 1385 missing.add(r)
1356 1386 for p in self.parentrevs(r):
1357 1387 if p not in has:
1358 1388 visit.append(p)
1359 1389 missing = list(missing)
1360 1390 missing.sort()
1361 1391 return has, [self.node(miss) for miss in missing]
1362 1392
1363 1393 def incrementalmissingrevs(self, common=None):
1364 1394 """Return an object that can be used to incrementally compute the
1365 1395 revision numbers of the ancestors of arbitrary sets that are not
1366 1396 ancestors of common. This is an ancestor.incrementalmissingancestors
1367 1397 object.
1368 1398
1369 1399 'common' is a list of revision numbers. If common is not supplied, uses
1370 1400 nullrev.
1371 1401 """
1372 1402 if common is None:
1373 1403 common = [nullrev]
1374 1404
1375 1405 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1376 1406
1377 1407 def findmissingrevs(self, common=None, heads=None):
1378 1408 """Return the revision numbers of the ancestors of heads that
1379 1409 are not ancestors of common.
1380 1410
1381 1411 More specifically, return a list of revision numbers corresponding to
1382 1412 nodes N such that every N satisfies the following constraints:
1383 1413
1384 1414 1. N is an ancestor of some node in 'heads'
1385 1415 2. N is not an ancestor of any node in 'common'
1386 1416
1387 1417 The list is sorted by revision number, meaning it is
1388 1418 topologically sorted.
1389 1419
1390 1420 'heads' and 'common' are both lists of revision numbers. If heads is
1391 1421 not supplied, uses all of the revlog's heads. If common is not
1392 1422 supplied, uses nullid."""
1393 1423 if common is None:
1394 1424 common = [nullrev]
1395 1425 if heads is None:
1396 1426 heads = self.headrevs()
1397 1427
1398 1428 inc = self.incrementalmissingrevs(common=common)
1399 1429 return inc.missingancestors(heads)
1400 1430
1401 1431 def findmissing(self, common=None, heads=None):
1402 1432 """Return the ancestors of heads that are not ancestors of common.
1403 1433
1404 1434 More specifically, return a list of nodes N such that every N
1405 1435 satisfies the following constraints:
1406 1436
1407 1437 1. N is an ancestor of some node in 'heads'
1408 1438 2. N is not an ancestor of any node in 'common'
1409 1439
1410 1440 The list is sorted by revision number, meaning it is
1411 1441 topologically sorted.
1412 1442
1413 1443 'heads' and 'common' are both lists of node IDs. If heads is
1414 1444 not supplied, uses all of the revlog's heads. If common is not
1415 1445 supplied, uses nullid."""
1416 1446 if common is None:
1417 1447 common = [nullid]
1418 1448 if heads is None:
1419 1449 heads = self.heads()
1420 1450
1421 1451 common = [self.rev(n) for n in common]
1422 1452 heads = [self.rev(n) for n in heads]
1423 1453
1424 1454 inc = self.incrementalmissingrevs(common=common)
1425 1455 return [self.node(r) for r in inc.missingancestors(heads)]
1426 1456
1427 1457 def nodesbetween(self, roots=None, heads=None):
1428 1458 """Return a topological path from 'roots' to 'heads'.
1429 1459
1430 1460 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1431 1461 topologically sorted list of all nodes N that satisfy both of
1432 1462 these constraints:
1433 1463
1434 1464 1. N is a descendant of some node in 'roots'
1435 1465 2. N is an ancestor of some node in 'heads'
1436 1466
1437 1467 Every node is considered to be both a descendant and an ancestor
1438 1468 of itself, so every reachable node in 'roots' and 'heads' will be
1439 1469 included in 'nodes'.
1440 1470
1441 1471 'outroots' is the list of reachable nodes in 'roots', i.e., the
1442 1472 subset of 'roots' that is returned in 'nodes'. Likewise,
1443 1473 'outheads' is the subset of 'heads' that is also in 'nodes'.
1444 1474
1445 1475 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1446 1476 unspecified, uses nullid as the only root. If 'heads' is
1447 1477 unspecified, uses list of all of the revlog's heads."""
1448 1478 nonodes = ([], [], [])
1449 1479 if roots is not None:
1450 1480 roots = list(roots)
1451 1481 if not roots:
1452 1482 return nonodes
1453 1483 lowestrev = min([self.rev(n) for n in roots])
1454 1484 else:
1455 1485 roots = [nullid] # Everybody's a descendant of nullid
1456 1486 lowestrev = nullrev
1457 1487 if (lowestrev == nullrev) and (heads is None):
1458 1488 # We want _all_ the nodes!
1459 1489 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1460 1490 if heads is None:
1461 1491 # All nodes are ancestors, so the latest ancestor is the last
1462 1492 # node.
1463 1493 highestrev = len(self) - 1
1464 1494 # Set ancestors to None to signal that every node is an ancestor.
1465 1495 ancestors = None
1466 1496 # Set heads to an empty dictionary for later discovery of heads
1467 1497 heads = {}
1468 1498 else:
1469 1499 heads = list(heads)
1470 1500 if not heads:
1471 1501 return nonodes
1472 1502 ancestors = set()
1473 1503 # Turn heads into a dictionary so we can remove 'fake' heads.
1474 1504 # Also, later we will be using it to filter out the heads we can't
1475 1505 # find from roots.
1476 1506 heads = dict.fromkeys(heads, False)
1477 1507 # Start at the top and keep marking parents until we're done.
1478 1508 nodestotag = set(heads)
1479 1509 # Remember where the top was so we can use it as a limit later.
1480 1510 highestrev = max([self.rev(n) for n in nodestotag])
1481 1511 while nodestotag:
1482 1512 # grab a node to tag
1483 1513 n = nodestotag.pop()
1484 1514 # Never tag nullid
1485 1515 if n == nullid:
1486 1516 continue
1487 1517 # A node's revision number represents its place in a
1488 1518 # topologically sorted list of nodes.
1489 1519 r = self.rev(n)
1490 1520 if r >= lowestrev:
1491 1521 if n not in ancestors:
1492 1522 # If we are possibly a descendant of one of the roots
1493 1523 # and we haven't already been marked as an ancestor
1494 1524 ancestors.add(n) # Mark as ancestor
1495 1525 # Add non-nullid parents to list of nodes to tag.
1496 1526 nodestotag.update([p for p in self.parents(n) if
1497 1527 p != nullid])
1498 1528 elif n in heads: # We've seen it before, is it a fake head?
1499 1529 # So it is, real heads should not be the ancestors of
1500 1530 # any other heads.
1501 1531 heads.pop(n)
1502 1532 if not ancestors:
1503 1533 return nonodes
1504 1534 # Now that we have our set of ancestors, we want to remove any
1505 1535 # roots that are not ancestors.
1506 1536
1507 1537 # If one of the roots was nullid, everything is included anyway.
1508 1538 if lowestrev > nullrev:
1509 1539 # But, since we weren't, let's recompute the lowest rev to not
1510 1540 # include roots that aren't ancestors.
1511 1541
1512 1542 # Filter out roots that aren't ancestors of heads
1513 1543 roots = [root for root in roots if root in ancestors]
1514 1544 # Recompute the lowest revision
1515 1545 if roots:
1516 1546 lowestrev = min([self.rev(root) for root in roots])
1517 1547 else:
1518 1548 # No more roots? Return empty list
1519 1549 return nonodes
1520 1550 else:
1521 1551 # We are descending from nullid, and don't need to care about
1522 1552 # any other roots.
1523 1553 lowestrev = nullrev
1524 1554 roots = [nullid]
1525 1555 # Transform our roots list into a set.
1526 1556 descendants = set(roots)
1527 1557 # Also, keep the original roots so we can filter out roots that aren't
1528 1558 # 'real' roots (i.e. are descended from other roots).
1529 1559 roots = descendants.copy()
1530 1560 # Our topologically sorted list of output nodes.
1531 1561 orderedout = []
1532 1562 # Don't start at nullid since we don't want nullid in our output list,
1533 1563 # and if nullid shows up in descendants, empty parents will look like
1534 1564 # they're descendants.
1535 1565 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1536 1566 n = self.node(r)
1537 1567 isdescendant = False
1538 1568 if lowestrev == nullrev: # Everybody is a descendant of nullid
1539 1569 isdescendant = True
1540 1570 elif n in descendants:
1541 1571 # n is already a descendant
1542 1572 isdescendant = True
1543 1573 # This check only needs to be done here because all the roots
1544 1574 # will start being marked is descendants before the loop.
1545 1575 if n in roots:
1546 1576 # If n was a root, check if it's a 'real' root.
1547 1577 p = tuple(self.parents(n))
1548 1578 # If any of its parents are descendants, it's not a root.
1549 1579 if (p[0] in descendants) or (p[1] in descendants):
1550 1580 roots.remove(n)
1551 1581 else:
1552 1582 p = tuple(self.parents(n))
1553 1583 # A node is a descendant if either of its parents are
1554 1584 # descendants. (We seeded the dependents list with the roots
1555 1585 # up there, remember?)
1556 1586 if (p[0] in descendants) or (p[1] in descendants):
1557 1587 descendants.add(n)
1558 1588 isdescendant = True
1559 1589 if isdescendant and ((ancestors is None) or (n in ancestors)):
1560 1590 # Only include nodes that are both descendants and ancestors.
1561 1591 orderedout.append(n)
1562 1592 if (ancestors is not None) and (n in heads):
1563 1593 # We're trying to figure out which heads are reachable
1564 1594 # from roots.
1565 1595 # Mark this head as having been reached
1566 1596 heads[n] = True
1567 1597 elif ancestors is None:
1568 1598 # Otherwise, we're trying to discover the heads.
1569 1599 # Assume this is a head because if it isn't, the next step
1570 1600 # will eventually remove it.
1571 1601 heads[n] = True
1572 1602 # But, obviously its parents aren't.
1573 1603 for p in self.parents(n):
1574 1604 heads.pop(p, None)
1575 1605 heads = [head for head, flag in heads.iteritems() if flag]
1576 1606 roots = list(roots)
1577 1607 assert orderedout
1578 1608 assert roots
1579 1609 assert heads
1580 1610 return (orderedout, roots, heads)
1581 1611
1582 1612 def headrevs(self):
1583 1613 try:
1584 1614 return self.index.headrevs()
1585 1615 except AttributeError:
1586 1616 return self._headrevs()
1587 1617
1588 1618 def computephases(self, roots):
1589 1619 return self.index.computephasesmapsets(roots)
1590 1620
1591 1621 def _headrevs(self):
1592 1622 count = len(self)
1593 1623 if not count:
1594 1624 return [nullrev]
1595 1625 # we won't iter over filtered rev so nobody is a head at start
1596 1626 ishead = [0] * (count + 1)
1597 1627 index = self.index
1598 1628 for r in self:
1599 1629 ishead[r] = 1 # I may be an head
1600 1630 e = index[r]
1601 1631 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1602 1632 return [r for r, val in enumerate(ishead) if val]
1603 1633
1604 1634 def heads(self, start=None, stop=None):
1605 1635 """return the list of all nodes that have no children
1606 1636
1607 1637 if start is specified, only heads that are descendants of
1608 1638 start will be returned
1609 1639 if stop is specified, it will consider all the revs from stop
1610 1640 as if they had no children
1611 1641 """
1612 1642 if start is None and stop is None:
1613 1643 if not len(self):
1614 1644 return [nullid]
1615 1645 return [self.node(r) for r in self.headrevs()]
1616 1646
1617 1647 if start is None:
1618 1648 start = nullid
1619 1649 if stop is None:
1620 1650 stop = []
1621 1651 stoprevs = set([self.rev(n) for n in stop])
1622 1652 startrev = self.rev(start)
1623 1653 reachable = {startrev}
1624 1654 heads = {startrev}
1625 1655
1626 1656 parentrevs = self.parentrevs
1627 1657 for r in self.revs(start=startrev + 1):
1628 1658 for p in parentrevs(r):
1629 1659 if p in reachable:
1630 1660 if r not in stoprevs:
1631 1661 reachable.add(r)
1632 1662 heads.add(r)
1633 1663 if p in heads and p not in stoprevs:
1634 1664 heads.remove(p)
1635 1665
1636 1666 return [self.node(r) for r in heads]
1637 1667
1638 1668 def children(self, node):
1639 1669 """find the children of a given node"""
1640 1670 c = []
1641 1671 p = self.rev(node)
1642 1672 for r in self.revs(start=p + 1):
1643 1673 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1644 1674 if prevs:
1645 1675 for pr in prevs:
1646 1676 if pr == p:
1647 1677 c.append(self.node(r))
1648 1678 elif p == nullrev:
1649 1679 c.append(self.node(r))
1650 1680 return c
1651 1681
1652 1682 def commonancestorsheads(self, a, b):
1653 1683 """calculate all the heads of the common ancestors of nodes a and b"""
1654 1684 a, b = self.rev(a), self.rev(b)
1655 1685 ancs = self._commonancestorsheads(a, b)
1656 1686 return pycompat.maplist(self.node, ancs)
1657 1687
1658 1688 def _commonancestorsheads(self, *revs):
1659 1689 """calculate all the heads of the common ancestors of revs"""
1660 1690 try:
1661 1691 ancs = self.index.commonancestorsheads(*revs)
1662 1692 except (AttributeError, OverflowError): # C implementation failed
1663 1693 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1664 1694 return ancs
1665 1695
1666 1696 def isancestor(self, a, b):
1667 1697 """return True if node a is an ancestor of node b
1668 1698
1669 1699 A revision is considered an ancestor of itself."""
1670 1700 a, b = self.rev(a), self.rev(b)
1671 1701 return self.isancestorrev(a, b)
1672 1702
1673 1703 def descendant(self, a, b):
1674 1704 msg = (b'revlog.descendant is deprecated, use revlog.isancestorrev')
1675 1705 self._repo.ui.deprecwarn(msg, b'4.7')
1676 1706 return self.isancestorrev(a, b)
1677 1707
1678 1708 def isancestorrev(self, a, b):
1679 1709 """return True if revision a is an ancestor of revision b
1680 1710
1681 1711 A revision is considered an ancestor of itself.
1682 1712
1683 1713 The implementation of this is trivial but the use of
1684 1714 commonancestorsheads is not."""
1685 1715 if a == nullrev:
1686 1716 return True
1687 1717 elif a == b:
1688 1718 return True
1689 1719 elif a > b:
1690 1720 return False
1691 1721 return a in self._commonancestorsheads(a, b)
1692 1722
1693 1723 def ancestor(self, a, b):
1694 1724 """calculate the "best" common ancestor of nodes a and b"""
1695 1725
1696 1726 a, b = self.rev(a), self.rev(b)
1697 1727 try:
1698 1728 ancs = self.index.ancestors(a, b)
1699 1729 except (AttributeError, OverflowError):
1700 1730 ancs = ancestor.ancestors(self.parentrevs, a, b)
1701 1731 if ancs:
1702 1732 # choose a consistent winner when there's a tie
1703 1733 return min(map(self.node, ancs))
1704 1734 return nullid
1705 1735
1706 1736 def _match(self, id):
1707 1737 if isinstance(id, int):
1708 1738 # rev
1709 1739 return self.node(id)
1710 1740 if len(id) == 20:
1711 1741 # possibly a binary node
1712 1742 # odds of a binary node being all hex in ASCII are 1 in 10**25
1713 1743 try:
1714 1744 node = id
1715 1745 self.rev(node) # quick search the index
1716 1746 return node
1717 1747 except LookupError:
1718 1748 pass # may be partial hex id
1719 1749 try:
1720 1750 # str(rev)
1721 1751 rev = int(id)
1722 1752 if "%d" % rev != id:
1723 1753 raise ValueError
1724 1754 if rev < 0:
1725 1755 rev = len(self) + rev
1726 1756 if rev < 0 or rev >= len(self):
1727 1757 raise ValueError
1728 1758 return self.node(rev)
1729 1759 except (ValueError, OverflowError):
1730 1760 pass
1731 1761 if len(id) == 40:
1732 1762 try:
1733 1763 # a full hex nodeid?
1734 1764 node = bin(id)
1735 1765 self.rev(node)
1736 1766 return node
1737 1767 except (TypeError, LookupError):
1738 1768 pass
1739 1769
1740 1770 def _partialmatch(self, id):
1741 1771 # we don't care wdirfilenodeids as they should be always full hash
1742 1772 maybewdir = wdirhex.startswith(id)
1743 1773 try:
1744 1774 partial = self.index.partialmatch(id)
1745 1775 if partial and self.hasnode(partial):
1746 1776 if maybewdir:
1747 1777 # single 'ff...' match in radix tree, ambiguous with wdir
1748 1778 raise RevlogError
1749 1779 return partial
1750 1780 if maybewdir:
1751 1781 # no 'ff...' match in radix tree, wdir identified
1752 1782 raise error.WdirUnsupported
1753 1783 return None
1754 1784 except RevlogError:
1755 1785 # parsers.c radix tree lookup gave multiple matches
1756 1786 # fast path: for unfiltered changelog, radix tree is accurate
1757 1787 if not getattr(self, 'filteredrevs', None):
1758 1788 raise LookupError(id, self.indexfile,
1759 1789 _('ambiguous identifier'))
1760 1790 # fall through to slow path that filters hidden revisions
1761 1791 except (AttributeError, ValueError):
1762 1792 # we are pure python, or key was too short to search radix tree
1763 1793 pass
1764 1794
1765 1795 if id in self._pcache:
1766 1796 return self._pcache[id]
1767 1797
1768 1798 if len(id) <= 40:
1769 1799 try:
1770 1800 # hex(node)[:...]
1771 1801 l = len(id) // 2 # grab an even number of digits
1772 1802 prefix = bin(id[:l * 2])
1773 1803 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1774 1804 nl = [n for n in nl if hex(n).startswith(id) and
1775 1805 self.hasnode(n)]
1776 1806 if len(nl) > 0:
1777 1807 if len(nl) == 1 and not maybewdir:
1778 1808 self._pcache[id] = nl[0]
1779 1809 return nl[0]
1780 1810 raise LookupError(id, self.indexfile,
1781 1811 _('ambiguous identifier'))
1782 1812 if maybewdir:
1783 1813 raise error.WdirUnsupported
1784 1814 return None
1785 1815 except TypeError:
1786 1816 pass
1787 1817
1788 1818 def lookup(self, id):
1789 1819 """locate a node based on:
1790 1820 - revision number or str(revision number)
1791 1821 - nodeid or subset of hex nodeid
1792 1822 """
1793 1823 n = self._match(id)
1794 1824 if n is not None:
1795 1825 return n
1796 1826 n = self._partialmatch(id)
1797 1827 if n:
1798 1828 return n
1799 1829
1800 1830 raise LookupError(id, self.indexfile, _('no match found'))
1801 1831
1802 1832 def shortest(self, node, minlength=1):
1803 1833 """Find the shortest unambiguous prefix that matches node."""
1804 1834 def isvalid(prefix):
1805 1835 try:
1806 1836 node = self._partialmatch(prefix)
1807 1837 except error.RevlogError:
1808 1838 return False
1809 1839 except error.WdirUnsupported:
1810 1840 # single 'ff...' match
1811 1841 return True
1812 1842 if node is None:
1813 1843 raise LookupError(node, self.indexfile, _('no node'))
1814 1844 return True
1815 1845
1816 1846 def maybewdir(prefix):
1817 1847 return all(c == 'f' for c in prefix)
1818 1848
1819 1849 hexnode = hex(node)
1820 1850
1821 1851 def disambiguate(hexnode, minlength):
1822 1852 """Disambiguate against wdirid."""
1823 1853 for length in range(minlength, 41):
1824 1854 prefix = hexnode[:length]
1825 1855 if not maybewdir(prefix):
1826 1856 return prefix
1827 1857
1828 1858 if not getattr(self, 'filteredrevs', None):
1829 1859 try:
1830 1860 length = max(self.index.shortest(node), minlength)
1831 1861 return disambiguate(hexnode, length)
1832 1862 except RevlogError:
1833 1863 if node != wdirid:
1834 1864 raise LookupError(node, self.indexfile, _('no node'))
1835 1865 except AttributeError:
1836 1866 # Fall through to pure code
1837 1867 pass
1838 1868
1839 1869 if node == wdirid:
1840 1870 for length in range(minlength, 41):
1841 1871 prefix = hexnode[:length]
1842 1872 if isvalid(prefix):
1843 1873 return prefix
1844 1874
1845 1875 for length in range(minlength, 41):
1846 1876 prefix = hexnode[:length]
1847 1877 if isvalid(prefix):
1848 1878 return disambiguate(hexnode, length)
1849 1879
1850 1880 def cmp(self, node, text):
1851 1881 """compare text with a given file revision
1852 1882
1853 1883 returns True if text is different than what is stored.
1854 1884 """
1855 1885 p1, p2 = self.parents(node)
1856 1886 return hash(text, p1, p2) != node
1857 1887
1858 1888 def _cachesegment(self, offset, data):
1859 1889 """Add a segment to the revlog cache.
1860 1890
1861 1891 Accepts an absolute offset and the data that is at that location.
1862 1892 """
1863 1893 o, d = self._chunkcache
1864 1894 # try to add to existing cache
1865 1895 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1866 1896 self._chunkcache = o, d + data
1867 1897 else:
1868 1898 self._chunkcache = offset, data
1869 1899
1870 1900 def _readsegment(self, offset, length, df=None):
1871 1901 """Load a segment of raw data from the revlog.
1872 1902
1873 1903 Accepts an absolute offset, length to read, and an optional existing
1874 1904 file handle to read from.
1875 1905
1876 1906 If an existing file handle is passed, it will be seeked and the
1877 1907 original seek position will NOT be restored.
1878 1908
1879 1909 Returns a str or buffer of raw byte data.
1880 1910 """
1881 1911 # Cache data both forward and backward around the requested
1882 1912 # data, in a fixed size window. This helps speed up operations
1883 1913 # involving reading the revlog backwards.
1884 1914 cachesize = self._chunkcachesize
1885 1915 realoffset = offset & ~(cachesize - 1)
1886 1916 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1887 1917 - realoffset)
1888 1918 with self._datareadfp(df) as df:
1889 1919 df.seek(realoffset)
1890 1920 d = df.read(reallength)
1891 1921 self._cachesegment(realoffset, d)
1892 1922 if offset != realoffset or reallength != length:
1893 1923 return util.buffer(d, offset - realoffset, length)
1894 1924 return d
1895 1925
1896 1926 def _getsegment(self, offset, length, df=None):
1897 1927 """Obtain a segment of raw data from the revlog.
1898 1928
1899 1929 Accepts an absolute offset, length of bytes to obtain, and an
1900 1930 optional file handle to the already-opened revlog. If the file
1901 1931 handle is used, it's original seek position will not be preserved.
1902 1932
1903 1933 Requests for data may be returned from a cache.
1904 1934
1905 1935 Returns a str or a buffer instance of raw byte data.
1906 1936 """
1907 1937 o, d = self._chunkcache
1908 1938 l = len(d)
1909 1939
1910 1940 # is it in the cache?
1911 1941 cachestart = offset - o
1912 1942 cacheend = cachestart + length
1913 1943 if cachestart >= 0 and cacheend <= l:
1914 1944 if cachestart == 0 and cacheend == l:
1915 1945 return d # avoid a copy
1916 1946 return util.buffer(d, cachestart, cacheend - cachestart)
1917 1947
1918 1948 return self._readsegment(offset, length, df=df)
1919 1949
1920 1950 def _getsegmentforrevs(self, startrev, endrev, df=None):
1921 1951 """Obtain a segment of raw data corresponding to a range of revisions.
1922 1952
1923 1953 Accepts the start and end revisions and an optional already-open
1924 1954 file handle to be used for reading. If the file handle is read, its
1925 1955 seek position will not be preserved.
1926 1956
1927 1957 Requests for data may be satisfied by a cache.
1928 1958
1929 1959 Returns a 2-tuple of (offset, data) for the requested range of
1930 1960 revisions. Offset is the integer offset from the beginning of the
1931 1961 revlog and data is a str or buffer of the raw byte data.
1932 1962
1933 1963 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1934 1964 to determine where each revision's data begins and ends.
1935 1965 """
1936 1966 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1937 1967 # (functions are expensive).
1938 1968 index = self.index
1939 1969 istart = index[startrev]
1940 1970 start = int(istart[0] >> 16)
1941 1971 if startrev == endrev:
1942 1972 end = start + istart[1]
1943 1973 else:
1944 1974 iend = index[endrev]
1945 1975 end = int(iend[0] >> 16) + iend[1]
1946 1976
1947 1977 if self._inline:
1948 1978 start += (startrev + 1) * self._io.size
1949 1979 end += (endrev + 1) * self._io.size
1950 1980 length = end - start
1951 1981
1952 1982 return start, self._getsegment(start, length, df=df)
1953 1983
1954 1984 def _chunk(self, rev, df=None):
1955 1985 """Obtain a single decompressed chunk for a revision.
1956 1986
1957 1987 Accepts an integer revision and an optional already-open file handle
1958 1988 to be used for reading. If used, the seek position of the file will not
1959 1989 be preserved.
1960 1990
1961 1991 Returns a str holding uncompressed data for the requested revision.
1962 1992 """
1963 1993 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1964 1994
1965 1995 def _chunks(self, revs, df=None, targetsize=None):
1966 1996 """Obtain decompressed chunks for the specified revisions.
1967 1997
1968 1998 Accepts an iterable of numeric revisions that are assumed to be in
1969 1999 ascending order. Also accepts an optional already-open file handle
1970 2000 to be used for reading. If used, the seek position of the file will
1971 2001 not be preserved.
1972 2002
1973 2003 This function is similar to calling ``self._chunk()`` multiple times,
1974 2004 but is faster.
1975 2005
1976 2006 Returns a list with decompressed data for each requested revision.
1977 2007 """
1978 2008 if not revs:
1979 2009 return []
1980 2010 start = self.start
1981 2011 length = self.length
1982 2012 inline = self._inline
1983 2013 iosize = self._io.size
1984 2014 buffer = util.buffer
1985 2015
1986 2016 l = []
1987 2017 ladd = l.append
1988 2018
1989 2019 if not self._withsparseread:
1990 2020 slicedchunks = (revs,)
1991 2021 else:
1992 slicedchunks = _slicechunk(self, revs, targetsize)
2022 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
1993 2023
1994 2024 for revschunk in slicedchunks:
1995 2025 firstrev = revschunk[0]
1996 2026 # Skip trailing revisions with empty diff
1997 2027 for lastrev in revschunk[::-1]:
1998 2028 if length(lastrev) != 0:
1999 2029 break
2000 2030
2001 2031 try:
2002 2032 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2003 2033 except OverflowError:
2004 2034 # issue4215 - we can't cache a run of chunks greater than
2005 2035 # 2G on Windows
2006 2036 return [self._chunk(rev, df=df) for rev in revschunk]
2007 2037
2008 2038 decomp = self.decompress
2009 2039 for rev in revschunk:
2010 2040 chunkstart = start(rev)
2011 2041 if inline:
2012 2042 chunkstart += (rev + 1) * iosize
2013 2043 chunklength = length(rev)
2014 2044 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2015 2045
2016 2046 return l
2017 2047
2018 2048 def _chunkclear(self):
2019 2049 """Clear the raw chunk cache."""
2020 2050 self._chunkcache = (0, '')
2021 2051
2022 2052 def deltaparent(self, rev):
2023 2053 """return deltaparent of the given revision"""
2024 2054 base = self.index[rev][3]
2025 2055 if base == rev:
2026 2056 return nullrev
2027 2057 elif self._generaldelta:
2028 2058 return base
2029 2059 else:
2030 2060 return rev - 1
2031 2061
2032 2062 def revdiff(self, rev1, rev2):
2033 2063 """return or calculate a delta between two revisions
2034 2064
2035 2065 The delta calculated is in binary form and is intended to be written to
2036 2066 revlog data directly. So this function needs raw revision data.
2037 2067 """
2038 2068 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2039 2069 return bytes(self._chunk(rev2))
2040 2070
2041 2071 return mdiff.textdiff(self.revision(rev1, raw=True),
2042 2072 self.revision(rev2, raw=True))
2043 2073
2044 2074 def revision(self, nodeorrev, _df=None, raw=False):
2045 2075 """return an uncompressed revision of a given node or revision
2046 2076 number.
2047 2077
2048 2078 _df - an existing file handle to read from. (internal-only)
2049 2079 raw - an optional argument specifying if the revision data is to be
2050 2080 treated as raw data when applying flag transforms. 'raw' should be set
2051 2081 to True when generating changegroups or in debug commands.
2052 2082 """
2053 2083 if isinstance(nodeorrev, int):
2054 2084 rev = nodeorrev
2055 2085 node = self.node(rev)
2056 2086 else:
2057 2087 node = nodeorrev
2058 2088 rev = None
2059 2089
2060 2090 cachedrev = None
2061 2091 flags = None
2062 2092 rawtext = None
2063 2093 if node == nullid:
2064 2094 return ""
2065 2095 if self._cache:
2066 2096 if self._cache[0] == node:
2067 2097 # _cache only stores rawtext
2068 2098 if raw:
2069 2099 return self._cache[2]
2070 2100 # duplicated, but good for perf
2071 2101 if rev is None:
2072 2102 rev = self.rev(node)
2073 2103 if flags is None:
2074 2104 flags = self.flags(rev)
2075 2105 # no extra flags set, no flag processor runs, text = rawtext
2076 2106 if flags == REVIDX_DEFAULT_FLAGS:
2077 2107 return self._cache[2]
2078 2108 # rawtext is reusable. need to run flag processor
2079 2109 rawtext = self._cache[2]
2080 2110
2081 2111 cachedrev = self._cache[1]
2082 2112
2083 2113 # look up what we need to read
2084 2114 if rawtext is None:
2085 2115 if rev is None:
2086 2116 rev = self.rev(node)
2087 2117
2088 2118 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2089 2119 if stopped:
2090 2120 rawtext = self._cache[2]
2091 2121
2092 2122 # drop cache to save memory
2093 2123 self._cache = None
2094 2124
2095 2125 targetsize = None
2096 2126 rawsize = self.index[rev][2]
2097 2127 if 0 <= rawsize:
2098 2128 targetsize = 4 * rawsize
2099 2129
2100 2130 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2101 2131 if rawtext is None:
2102 2132 rawtext = bytes(bins[0])
2103 2133 bins = bins[1:]
2104 2134
2105 2135 rawtext = mdiff.patches(rawtext, bins)
2106 2136 self._cache = (node, rev, rawtext)
2107 2137
2108 2138 if flags is None:
2109 2139 if rev is None:
2110 2140 rev = self.rev(node)
2111 2141 flags = self.flags(rev)
2112 2142
2113 2143 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2114 2144 if validatehash:
2115 2145 self.checkhash(text, node, rev=rev)
2116 2146
2117 2147 return text
2118 2148
2119 2149 def hash(self, text, p1, p2):
2120 2150 """Compute a node hash.
2121 2151
2122 2152 Available as a function so that subclasses can replace the hash
2123 2153 as needed.
2124 2154 """
2125 2155 return hash(text, p1, p2)
2126 2156
2127 2157 def _processflags(self, text, flags, operation, raw=False):
2128 2158 """Inspect revision data flags and applies transforms defined by
2129 2159 registered flag processors.
2130 2160
2131 2161 ``text`` - the revision data to process
2132 2162 ``flags`` - the revision flags
2133 2163 ``operation`` - the operation being performed (read or write)
2134 2164 ``raw`` - an optional argument describing if the raw transform should be
2135 2165 applied.
2136 2166
2137 2167 This method processes the flags in the order (or reverse order if
2138 2168 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2139 2169 flag processors registered for present flags. The order of flags defined
2140 2170 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2141 2171
2142 2172 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2143 2173 processed text and ``validatehash`` is a bool indicating whether the
2144 2174 returned text should be checked for hash integrity.
2145 2175
2146 2176 Note: If the ``raw`` argument is set, it has precedence over the
2147 2177 operation and will only update the value of ``validatehash``.
2148 2178 """
2149 2179 # fast path: no flag processors will run
2150 2180 if flags == 0:
2151 2181 return text, True
2152 2182 if not operation in ('read', 'write'):
2153 2183 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2154 2184 # Check all flags are known.
2155 2185 if flags & ~REVIDX_KNOWN_FLAGS:
2156 2186 raise RevlogError(_("incompatible revision flag '%#x'") %
2157 2187 (flags & ~REVIDX_KNOWN_FLAGS))
2158 2188 validatehash = True
2159 2189 # Depending on the operation (read or write), the order might be
2160 2190 # reversed due to non-commutative transforms.
2161 2191 orderedflags = REVIDX_FLAGS_ORDER
2162 2192 if operation == 'write':
2163 2193 orderedflags = reversed(orderedflags)
2164 2194
2165 2195 for flag in orderedflags:
2166 2196 # If a flagprocessor has been registered for a known flag, apply the
2167 2197 # related operation transform and update result tuple.
2168 2198 if flag & flags:
2169 2199 vhash = True
2170 2200
2171 2201 if flag not in _flagprocessors:
2172 2202 message = _("missing processor for flag '%#x'") % (flag)
2173 2203 raise RevlogError(message)
2174 2204
2175 2205 processor = _flagprocessors[flag]
2176 2206 if processor is not None:
2177 2207 readtransform, writetransform, rawtransform = processor
2178 2208
2179 2209 if raw:
2180 2210 vhash = rawtransform(self, text)
2181 2211 elif operation == 'read':
2182 2212 text, vhash = readtransform(self, text)
2183 2213 else: # write operation
2184 2214 text, vhash = writetransform(self, text)
2185 2215 validatehash = validatehash and vhash
2186 2216
2187 2217 return text, validatehash
2188 2218
2189 2219 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2190 2220 """Check node hash integrity.
2191 2221
2192 2222 Available as a function so that subclasses can extend hash mismatch
2193 2223 behaviors as needed.
2194 2224 """
2195 2225 try:
2196 2226 if p1 is None and p2 is None:
2197 2227 p1, p2 = self.parents(node)
2198 2228 if node != self.hash(text, p1, p2):
2199 2229 revornode = rev
2200 2230 if revornode is None:
2201 2231 revornode = templatefilters.short(hex(node))
2202 2232 raise RevlogError(_("integrity check failed on %s:%s")
2203 2233 % (self.indexfile, pycompat.bytestr(revornode)))
2204 2234 except RevlogError:
2205 2235 if self._censorable and _censoredtext(text):
2206 2236 raise error.CensoredNodeError(self.indexfile, node, text)
2207 2237 raise
2208 2238
2209 2239 def _enforceinlinesize(self, tr, fp=None):
2210 2240 """Check if the revlog is too big for inline and convert if so.
2211 2241
2212 2242 This should be called after revisions are added to the revlog. If the
2213 2243 revlog has grown too large to be an inline revlog, it will convert it
2214 2244 to use multiple index and data files.
2215 2245 """
2216 2246 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2217 2247 return
2218 2248
2219 2249 trinfo = tr.find(self.indexfile)
2220 2250 if trinfo is None:
2221 2251 raise RevlogError(_("%s not found in the transaction")
2222 2252 % self.indexfile)
2223 2253
2224 2254 trindex = trinfo[2]
2225 2255 if trindex is not None:
2226 2256 dataoff = self.start(trindex)
2227 2257 else:
2228 2258 # revlog was stripped at start of transaction, use all leftover data
2229 2259 trindex = len(self) - 1
2230 2260 dataoff = self.end(-2)
2231 2261
2232 2262 tr.add(self.datafile, dataoff)
2233 2263
2234 2264 if fp:
2235 2265 fp.flush()
2236 2266 fp.close()
2237 2267
2238 2268 with self._datafp('w') as df:
2239 2269 for r in self:
2240 2270 df.write(self._getsegmentforrevs(r, r)[1])
2241 2271
2242 2272 with self._indexfp('w') as fp:
2243 2273 self.version &= ~FLAG_INLINE_DATA
2244 2274 self._inline = False
2245 2275 io = self._io
2246 2276 for i in self:
2247 2277 e = io.packentry(self.index[i], self.node, self.version, i)
2248 2278 fp.write(e)
2249 2279
2250 2280 # the temp file replace the real index when we exit the context
2251 2281 # manager
2252 2282
2253 2283 tr.replace(self.indexfile, trindex * self._io.size)
2254 2284 self._chunkclear()
2255 2285
2256 2286 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2257 2287 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2258 2288 """add a revision to the log
2259 2289
2260 2290 text - the revision data to add
2261 2291 transaction - the transaction object used for rollback
2262 2292 link - the linkrev data to add
2263 2293 p1, p2 - the parent nodeids of the revision
2264 2294 cachedelta - an optional precomputed delta
2265 2295 node - nodeid of revision; typically node is not specified, and it is
2266 2296 computed by default as hash(text, p1, p2), however subclasses might
2267 2297 use different hashing method (and override checkhash() in such case)
2268 2298 flags - the known flags to set on the revision
2269 2299 deltacomputer - an optional _deltacomputer instance shared between
2270 2300 multiple calls
2271 2301 """
2272 2302 if link == nullrev:
2273 2303 raise RevlogError(_("attempted to add linkrev -1 to %s")
2274 2304 % self.indexfile)
2275 2305
2276 2306 if flags:
2277 2307 node = node or self.hash(text, p1, p2)
2278 2308
2279 2309 rawtext, validatehash = self._processflags(text, flags, 'write')
2280 2310
2281 2311 # If the flag processor modifies the revision data, ignore any provided
2282 2312 # cachedelta.
2283 2313 if rawtext != text:
2284 2314 cachedelta = None
2285 2315
2286 2316 if len(rawtext) > _maxentrysize:
2287 2317 raise RevlogError(
2288 2318 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2289 2319 % (self.indexfile, len(rawtext)))
2290 2320
2291 2321 node = node or self.hash(rawtext, p1, p2)
2292 2322 if node in self.nodemap:
2293 2323 return node
2294 2324
2295 2325 if validatehash:
2296 2326 self.checkhash(rawtext, node, p1=p1, p2=p2)
2297 2327
2298 2328 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2299 2329 flags, cachedelta=cachedelta,
2300 2330 deltacomputer=deltacomputer)
2301 2331
2302 2332 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2303 2333 cachedelta=None, deltacomputer=None):
2304 2334 """add a raw revision with known flags, node and parents
2305 2335 useful when reusing a revision not stored in this revlog (ex: received
2306 2336 over wire, or read from an external bundle).
2307 2337 """
2308 2338 dfh = None
2309 2339 if not self._inline:
2310 2340 dfh = self._datafp("a+")
2311 2341 ifh = self._indexfp("a+")
2312 2342 try:
2313 2343 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2314 2344 flags, cachedelta, ifh, dfh,
2315 2345 deltacomputer=deltacomputer)
2316 2346 finally:
2317 2347 if dfh:
2318 2348 dfh.close()
2319 2349 ifh.close()
2320 2350
2321 2351 def compress(self, data):
2322 2352 """Generate a possibly-compressed representation of data."""
2323 2353 if not data:
2324 2354 return '', data
2325 2355
2326 2356 compressed = self._compressor.compress(data)
2327 2357
2328 2358 if compressed:
2329 2359 # The revlog compressor added the header in the returned data.
2330 2360 return '', compressed
2331 2361
2332 2362 if data[0:1] == '\0':
2333 2363 return '', data
2334 2364 return 'u', data
2335 2365
2336 2366 def decompress(self, data):
2337 2367 """Decompress a revlog chunk.
2338 2368
2339 2369 The chunk is expected to begin with a header identifying the
2340 2370 format type so it can be routed to an appropriate decompressor.
2341 2371 """
2342 2372 if not data:
2343 2373 return data
2344 2374
2345 2375 # Revlogs are read much more frequently than they are written and many
2346 2376 # chunks only take microseconds to decompress, so performance is
2347 2377 # important here.
2348 2378 #
2349 2379 # We can make a few assumptions about revlogs:
2350 2380 #
2351 2381 # 1) the majority of chunks will be compressed (as opposed to inline
2352 2382 # raw data).
2353 2383 # 2) decompressing *any* data will likely by at least 10x slower than
2354 2384 # returning raw inline data.
2355 2385 # 3) we want to prioritize common and officially supported compression
2356 2386 # engines
2357 2387 #
2358 2388 # It follows that we want to optimize for "decompress compressed data
2359 2389 # when encoded with common and officially supported compression engines"
2360 2390 # case over "raw data" and "data encoded by less common or non-official
2361 2391 # compression engines." That is why we have the inline lookup first
2362 2392 # followed by the compengines lookup.
2363 2393 #
2364 2394 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2365 2395 # compressed chunks. And this matters for changelog and manifest reads.
2366 2396 t = data[0:1]
2367 2397
2368 2398 if t == 'x':
2369 2399 try:
2370 2400 return _zlibdecompress(data)
2371 2401 except zlib.error as e:
2372 2402 raise RevlogError(_('revlog decompress error: %s') %
2373 2403 stringutil.forcebytestr(e))
2374 2404 # '\0' is more common than 'u' so it goes first.
2375 2405 elif t == '\0':
2376 2406 return data
2377 2407 elif t == 'u':
2378 2408 return util.buffer(data, 1)
2379 2409
2380 2410 try:
2381 2411 compressor = self._decompressors[t]
2382 2412 except KeyError:
2383 2413 try:
2384 2414 engine = util.compengines.forrevlogheader(t)
2385 2415 compressor = engine.revlogcompressor()
2386 2416 self._decompressors[t] = compressor
2387 2417 except KeyError:
2388 2418 raise RevlogError(_('unknown compression type %r') % t)
2389 2419
2390 2420 return compressor.decompress(data)
2391 2421
2392 2422 def _isgooddeltainfo(self, deltainfo, revinfo):
2393 2423 """Returns True if the given delta is good. Good means that it is within
2394 2424 the disk span, disk size, and chain length bounds that we know to be
2395 2425 performant."""
2396 2426 if deltainfo is None:
2397 2427 return False
2398 2428
2399 2429 # - 'deltainfo.distance' is the distance from the base revision --
2400 2430 # bounding it limits the amount of I/O we need to do.
2401 2431 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2402 2432 # deltas we need to apply -- bounding it limits the amount of CPU
2403 2433 # we consume.
2404 2434
2405 distance = deltainfo.distance
2435 if self._sparserevlog:
2436 # As sparse-read will be used, we can consider that the distance,
2437 # instead of being the span of the whole chunk,
2438 # is the span of the largest read chunk
2439 base = deltainfo.base
2440
2441 if base != nullrev:
2442 deltachain = self._deltachain(base)[0]
2443 else:
2444 deltachain = []
2445
2446 chunks = _slicechunk(self, deltachain, deltainfo)
2447 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2448 else:
2449 distance = deltainfo.distance
2450
2406 2451 textlen = revinfo.textlen
2407 2452 defaultmax = textlen * 4
2408 2453 maxdist = self._maxdeltachainspan
2409 2454 if not maxdist:
2410 2455 maxdist = distance # ensure the conditional pass
2411 2456 maxdist = max(maxdist, defaultmax)
2457 if self._sparserevlog and maxdist < self._srmingapsize:
2458 # In multiple place, we are ignoring irrelevant data range below a
2459 # certain size. Be also apply this tradeoff here and relax span
2460 # constraint for small enought content.
2461 maxdist = self._srmingapsize
2412 2462 if (distance > maxdist or deltainfo.deltalen > textlen or
2413 2463 deltainfo.compresseddeltalen > textlen * 2 or
2414 2464 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2415 2465 return False
2416 2466
2417 2467 return True
2418 2468
2419 2469 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2420 2470 cachedelta, ifh, dfh, alwayscache=False,
2421 2471 deltacomputer=None):
2422 2472 """internal function to add revisions to the log
2423 2473
2424 2474 see addrevision for argument descriptions.
2425 2475
2426 2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2427 2477
2428 2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2429 2479 be used.
2430 2480
2431 2481 invariants:
2432 2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2433 2483 if both are set, they must correspond to each other.
2434 2484 """
2435 2485 if node == nullid:
2436 2486 raise RevlogError(_("%s: attempt to add null revision") %
2437 2487 (self.indexfile))
2438 2488 if node == wdirid or node in wdirfilenodeids:
2439 2489 raise RevlogError(_("%s: attempt to add wdir revision") %
2440 2490 (self.indexfile))
2441 2491
2442 2492 if self._inline:
2443 2493 fh = ifh
2444 2494 else:
2445 2495 fh = dfh
2446 2496
2447 2497 btext = [rawtext]
2448 2498
2449 2499 curr = len(self)
2450 2500 prev = curr - 1
2451 2501 offset = self.end(prev)
2452 2502 p1r, p2r = self.rev(p1), self.rev(p2)
2453 2503
2454 2504 # full versions are inserted when the needed deltas
2455 2505 # become comparable to the uncompressed text
2456 2506 if rawtext is None:
2457 2507 # need rawtext size, before changed by flag processors, which is
2458 2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2459 2509 # logic that might remove metadata size.
2460 2510 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2461 2511 cachedelta[1])
2462 2512 else:
2463 2513 textlen = len(rawtext)
2464 2514
2465 2515 if deltacomputer is None:
2466 2516 deltacomputer = _deltacomputer(self)
2467 2517
2468 2518 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2469 2519
2470 2520 # no delta for flag processor revision (see "candelta" for why)
2471 2521 # not calling candelta since only one revision needs test, also to
2472 2522 # avoid overhead fetching flags again.
2473 2523 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2474 2524 deltainfo = None
2475 2525 else:
2476 2526 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2477 2527
2478 2528 if deltainfo is not None:
2479 2529 base = deltainfo.base
2480 2530 chainbase = deltainfo.chainbase
2481 2531 data = deltainfo.data
2482 2532 l = deltainfo.deltalen
2483 2533 else:
2484 2534 rawtext = deltacomputer.buildtext(revinfo, fh)
2485 2535 data = self.compress(rawtext)
2486 2536 l = len(data[1]) + len(data[0])
2487 2537 base = chainbase = curr
2488 2538
2489 2539 e = (offset_type(offset, flags), l, textlen,
2490 2540 base, link, p1r, p2r, node)
2491 2541 self.index.insert(-1, e)
2492 2542 self.nodemap[node] = curr
2493 2543
2494 2544 entry = self._io.packentry(e, self.node, self.version, curr)
2495 2545 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2496 2546
2497 2547 if alwayscache and rawtext is None:
2498 2548 rawtext = deltacomputer._buildtext(revinfo, fh)
2499 2549
2500 2550 if type(rawtext) == bytes: # only accept immutable objects
2501 2551 self._cache = (node, curr, rawtext)
2502 2552 self._chainbasecache[curr] = chainbase
2503 2553 return node
2504 2554
2505 2555 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2506 2556 # Files opened in a+ mode have inconsistent behavior on various
2507 2557 # platforms. Windows requires that a file positioning call be made
2508 2558 # when the file handle transitions between reads and writes. See
2509 2559 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2510 2560 # platforms, Python or the platform itself can be buggy. Some versions
2511 2561 # of Solaris have been observed to not append at the end of the file
2512 2562 # if the file was seeked to before the end. See issue4943 for more.
2513 2563 #
2514 2564 # We work around this issue by inserting a seek() before writing.
2515 2565 # Note: This is likely not necessary on Python 3.
2516 2566 ifh.seek(0, os.SEEK_END)
2517 2567 if dfh:
2518 2568 dfh.seek(0, os.SEEK_END)
2519 2569
2520 2570 curr = len(self) - 1
2521 2571 if not self._inline:
2522 2572 transaction.add(self.datafile, offset)
2523 2573 transaction.add(self.indexfile, curr * len(entry))
2524 2574 if data[0]:
2525 2575 dfh.write(data[0])
2526 2576 dfh.write(data[1])
2527 2577 ifh.write(entry)
2528 2578 else:
2529 2579 offset += curr * self._io.size
2530 2580 transaction.add(self.indexfile, offset, curr)
2531 2581 ifh.write(entry)
2532 2582 ifh.write(data[0])
2533 2583 ifh.write(data[1])
2534 2584 self._enforceinlinesize(transaction, ifh)
2535 2585
2536 2586 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2537 2587 """
2538 2588 add a delta group
2539 2589
2540 2590 given a set of deltas, add them to the revision log. the
2541 2591 first delta is against its parent, which should be in our
2542 2592 log, the rest are against the previous delta.
2543 2593
2544 2594 If ``addrevisioncb`` is defined, it will be called with arguments of
2545 2595 this revlog and the node that was added.
2546 2596 """
2547 2597
2548 2598 nodes = []
2549 2599
2550 2600 r = len(self)
2551 2601 end = 0
2552 2602 if r:
2553 2603 end = self.end(r - 1)
2554 2604 ifh = self._indexfp("a+")
2555 2605 isize = r * self._io.size
2556 2606 if self._inline:
2557 2607 transaction.add(self.indexfile, end + isize, r)
2558 2608 dfh = None
2559 2609 else:
2560 2610 transaction.add(self.indexfile, isize, r)
2561 2611 transaction.add(self.datafile, end)
2562 2612 dfh = self._datafp("a+")
2563 2613 def flush():
2564 2614 if dfh:
2565 2615 dfh.flush()
2566 2616 ifh.flush()
2567 2617 try:
2568 2618 deltacomputer = _deltacomputer(self)
2569 2619 # loop through our set of deltas
2570 2620 for data in deltas:
2571 2621 node, p1, p2, linknode, deltabase, delta, flags = data
2572 2622 link = linkmapper(linknode)
2573 2623 flags = flags or REVIDX_DEFAULT_FLAGS
2574 2624
2575 2625 nodes.append(node)
2576 2626
2577 2627 if node in self.nodemap:
2578 2628 # this can happen if two branches make the same change
2579 2629 continue
2580 2630
2581 2631 for p in (p1, p2):
2582 2632 if p not in self.nodemap:
2583 2633 raise LookupError(p, self.indexfile,
2584 2634 _('unknown parent'))
2585 2635
2586 2636 if deltabase not in self.nodemap:
2587 2637 raise LookupError(deltabase, self.indexfile,
2588 2638 _('unknown delta base'))
2589 2639
2590 2640 baserev = self.rev(deltabase)
2591 2641
2592 2642 if baserev != nullrev and self.iscensored(baserev):
2593 2643 # if base is censored, delta must be full replacement in a
2594 2644 # single patch operation
2595 2645 hlen = struct.calcsize(">lll")
2596 2646 oldlen = self.rawsize(baserev)
2597 2647 newlen = len(delta) - hlen
2598 2648 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2599 2649 raise error.CensoredBaseError(self.indexfile,
2600 2650 self.node(baserev))
2601 2651
2602 2652 if not flags and self._peek_iscensored(baserev, delta, flush):
2603 2653 flags |= REVIDX_ISCENSORED
2604 2654
2605 2655 # We assume consumers of addrevisioncb will want to retrieve
2606 2656 # the added revision, which will require a call to
2607 2657 # revision(). revision() will fast path if there is a cache
2608 2658 # hit. So, we tell _addrevision() to always cache in this case.
2609 2659 # We're only using addgroup() in the context of changegroup
2610 2660 # generation so the revision data can always be handled as raw
2611 2661 # by the flagprocessor.
2612 2662 self._addrevision(node, None, transaction, link,
2613 2663 p1, p2, flags, (baserev, delta),
2614 2664 ifh, dfh,
2615 2665 alwayscache=bool(addrevisioncb),
2616 2666 deltacomputer=deltacomputer)
2617 2667
2618 2668 if addrevisioncb:
2619 2669 addrevisioncb(self, node)
2620 2670
2621 2671 if not dfh and not self._inline:
2622 2672 # addrevision switched from inline to conventional
2623 2673 # reopen the index
2624 2674 ifh.close()
2625 2675 dfh = self._datafp("a+")
2626 2676 ifh = self._indexfp("a+")
2627 2677 finally:
2628 2678 if dfh:
2629 2679 dfh.close()
2630 2680 ifh.close()
2631 2681
2632 2682 return nodes
2633 2683
2634 2684 def iscensored(self, rev):
2635 2685 """Check if a file revision is censored."""
2636 2686 if not self._censorable:
2637 2687 return False
2638 2688
2639 2689 return self.flags(rev) & REVIDX_ISCENSORED
2640 2690
2641 2691 def _peek_iscensored(self, baserev, delta, flush):
2642 2692 """Quickly check if a delta produces a censored revision."""
2643 2693 if not self._censorable:
2644 2694 return False
2645 2695
2646 2696 # Fragile heuristic: unless new file meta keys are added alphabetically
2647 2697 # preceding "censored", all censored revisions are prefixed by
2648 2698 # "\1\ncensored:". A delta producing such a censored revision must be a
2649 2699 # full-replacement delta, so we inspect the first and only patch in the
2650 2700 # delta for this prefix.
2651 2701 hlen = struct.calcsize(">lll")
2652 2702 if len(delta) <= hlen:
2653 2703 return False
2654 2704
2655 2705 oldlen = self.rawsize(baserev)
2656 2706 newlen = len(delta) - hlen
2657 2707 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2658 2708 return False
2659 2709
2660 2710 add = "\1\ncensored:"
2661 2711 addlen = len(add)
2662 2712 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2663 2713
2664 2714 def getstrippoint(self, minlink):
2665 2715 """find the minimum rev that must be stripped to strip the linkrev
2666 2716
2667 2717 Returns a tuple containing the minimum rev and a set of all revs that
2668 2718 have linkrevs that will be broken by this strip.
2669 2719 """
2670 2720 brokenrevs = set()
2671 2721 strippoint = len(self)
2672 2722
2673 2723 heads = {}
2674 2724 futurelargelinkrevs = set()
2675 2725 for head in self.headrevs():
2676 2726 headlinkrev = self.linkrev(head)
2677 2727 heads[head] = headlinkrev
2678 2728 if headlinkrev >= minlink:
2679 2729 futurelargelinkrevs.add(headlinkrev)
2680 2730
2681 2731 # This algorithm involves walking down the rev graph, starting at the
2682 2732 # heads. Since the revs are topologically sorted according to linkrev,
2683 2733 # once all head linkrevs are below the minlink, we know there are
2684 2734 # no more revs that could have a linkrev greater than minlink.
2685 2735 # So we can stop walking.
2686 2736 while futurelargelinkrevs:
2687 2737 strippoint -= 1
2688 2738 linkrev = heads.pop(strippoint)
2689 2739
2690 2740 if linkrev < minlink:
2691 2741 brokenrevs.add(strippoint)
2692 2742 else:
2693 2743 futurelargelinkrevs.remove(linkrev)
2694 2744
2695 2745 for p in self.parentrevs(strippoint):
2696 2746 if p != nullrev:
2697 2747 plinkrev = self.linkrev(p)
2698 2748 heads[p] = plinkrev
2699 2749 if plinkrev >= minlink:
2700 2750 futurelargelinkrevs.add(plinkrev)
2701 2751
2702 2752 return strippoint, brokenrevs
2703 2753
2704 2754 def strip(self, minlink, transaction):
2705 2755 """truncate the revlog on the first revision with a linkrev >= minlink
2706 2756
2707 2757 This function is called when we're stripping revision minlink and
2708 2758 its descendants from the repository.
2709 2759
2710 2760 We have to remove all revisions with linkrev >= minlink, because
2711 2761 the equivalent changelog revisions will be renumbered after the
2712 2762 strip.
2713 2763
2714 2764 So we truncate the revlog on the first of these revisions, and
2715 2765 trust that the caller has saved the revisions that shouldn't be
2716 2766 removed and that it'll re-add them after this truncation.
2717 2767 """
2718 2768 if len(self) == 0:
2719 2769 return
2720 2770
2721 2771 rev, _ = self.getstrippoint(minlink)
2722 2772 if rev == len(self):
2723 2773 return
2724 2774
2725 2775 # first truncate the files on disk
2726 2776 end = self.start(rev)
2727 2777 if not self._inline:
2728 2778 transaction.add(self.datafile, end)
2729 2779 end = rev * self._io.size
2730 2780 else:
2731 2781 end += rev * self._io.size
2732 2782
2733 2783 transaction.add(self.indexfile, end)
2734 2784
2735 2785 # then reset internal state in memory to forget those revisions
2736 2786 self._cache = None
2737 2787 self._chaininfocache = {}
2738 2788 self._chunkclear()
2739 2789 for x in xrange(rev, len(self)):
2740 2790 del self.nodemap[self.node(x)]
2741 2791
2742 2792 del self.index[rev:-1]
2743 2793 self._nodepos = None
2744 2794
2745 2795 def checksize(self):
2746 2796 expected = 0
2747 2797 if len(self):
2748 2798 expected = max(0, self.end(len(self) - 1))
2749 2799
2750 2800 try:
2751 2801 with self._datafp() as f:
2752 2802 f.seek(0, 2)
2753 2803 actual = f.tell()
2754 2804 dd = actual - expected
2755 2805 except IOError as inst:
2756 2806 if inst.errno != errno.ENOENT:
2757 2807 raise
2758 2808 dd = 0
2759 2809
2760 2810 try:
2761 2811 f = self.opener(self.indexfile)
2762 2812 f.seek(0, 2)
2763 2813 actual = f.tell()
2764 2814 f.close()
2765 2815 s = self._io.size
2766 2816 i = max(0, actual // s)
2767 2817 di = actual - (i * s)
2768 2818 if self._inline:
2769 2819 databytes = 0
2770 2820 for r in self:
2771 2821 databytes += max(0, self.length(r))
2772 2822 dd = 0
2773 2823 di = actual - len(self) * s - databytes
2774 2824 except IOError as inst:
2775 2825 if inst.errno != errno.ENOENT:
2776 2826 raise
2777 2827 di = 0
2778 2828
2779 2829 return (dd, di)
2780 2830
2781 2831 def files(self):
2782 2832 res = [self.indexfile]
2783 2833 if not self._inline:
2784 2834 res.append(self.datafile)
2785 2835 return res
2786 2836
2787 2837 DELTAREUSEALWAYS = 'always'
2788 2838 DELTAREUSESAMEREVS = 'samerevs'
2789 2839 DELTAREUSENEVER = 'never'
2790 2840
2791 2841 DELTAREUSEFULLADD = 'fulladd'
2792 2842
2793 2843 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2794 2844
2795 2845 def clone(self, tr, destrevlog, addrevisioncb=None,
2796 2846 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2797 2847 """Copy this revlog to another, possibly with format changes.
2798 2848
2799 2849 The destination revlog will contain the same revisions and nodes.
2800 2850 However, it may not be bit-for-bit identical due to e.g. delta encoding
2801 2851 differences.
2802 2852
2803 2853 The ``deltareuse`` argument control how deltas from the existing revlog
2804 2854 are preserved in the destination revlog. The argument can have the
2805 2855 following values:
2806 2856
2807 2857 DELTAREUSEALWAYS
2808 2858 Deltas will always be reused (if possible), even if the destination
2809 2859 revlog would not select the same revisions for the delta. This is the
2810 2860 fastest mode of operation.
2811 2861 DELTAREUSESAMEREVS
2812 2862 Deltas will be reused if the destination revlog would pick the same
2813 2863 revisions for the delta. This mode strikes a balance between speed
2814 2864 and optimization.
2815 2865 DELTAREUSENEVER
2816 2866 Deltas will never be reused. This is the slowest mode of execution.
2817 2867 This mode can be used to recompute deltas (e.g. if the diff/delta
2818 2868 algorithm changes).
2819 2869
2820 2870 Delta computation can be slow, so the choice of delta reuse policy can
2821 2871 significantly affect run time.
2822 2872
2823 2873 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2824 2874 two extremes. Deltas will be reused if they are appropriate. But if the
2825 2875 delta could choose a better revision, it will do so. This means if you
2826 2876 are converting a non-generaldelta revlog to a generaldelta revlog,
2827 2877 deltas will be recomputed if the delta's parent isn't a parent of the
2828 2878 revision.
2829 2879
2830 2880 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2831 2881 controls whether to compute deltas against both parents for merges.
2832 2882 By default, the current default is used.
2833 2883 """
2834 2884 if deltareuse not in self.DELTAREUSEALL:
2835 2885 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2836 2886
2837 2887 if len(destrevlog):
2838 2888 raise ValueError(_('destination revlog is not empty'))
2839 2889
2840 2890 if getattr(self, 'filteredrevs', None):
2841 2891 raise ValueError(_('source revlog has filtered revisions'))
2842 2892 if getattr(destrevlog, 'filteredrevs', None):
2843 2893 raise ValueError(_('destination revlog has filtered revisions'))
2844 2894
2845 2895 # lazydeltabase controls whether to reuse a cached delta, if possible.
2846 2896 oldlazydeltabase = destrevlog._lazydeltabase
2847 2897 oldamd = destrevlog._aggressivemergedeltas
2848 2898
2849 2899 try:
2850 2900 if deltareuse == self.DELTAREUSEALWAYS:
2851 2901 destrevlog._lazydeltabase = True
2852 2902 elif deltareuse == self.DELTAREUSESAMEREVS:
2853 2903 destrevlog._lazydeltabase = False
2854 2904
2855 2905 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2856 2906
2857 2907 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2858 2908 self.DELTAREUSESAMEREVS)
2859 2909
2860 2910 deltacomputer = _deltacomputer(destrevlog)
2861 2911 index = self.index
2862 2912 for rev in self:
2863 2913 entry = index[rev]
2864 2914
2865 2915 # Some classes override linkrev to take filtered revs into
2866 2916 # account. Use raw entry from index.
2867 2917 flags = entry[0] & 0xffff
2868 2918 linkrev = entry[4]
2869 2919 p1 = index[entry[5]][7]
2870 2920 p2 = index[entry[6]][7]
2871 2921 node = entry[7]
2872 2922
2873 2923 # (Possibly) reuse the delta from the revlog if allowed and
2874 2924 # the revlog chunk is a delta.
2875 2925 cachedelta = None
2876 2926 rawtext = None
2877 2927 if populatecachedelta:
2878 2928 dp = self.deltaparent(rev)
2879 2929 if dp != nullrev:
2880 2930 cachedelta = (dp, bytes(self._chunk(rev)))
2881 2931
2882 2932 if not cachedelta:
2883 2933 rawtext = self.revision(rev, raw=True)
2884 2934
2885 2935
2886 2936 if deltareuse == self.DELTAREUSEFULLADD:
2887 2937 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2888 2938 cachedelta=cachedelta,
2889 2939 node=node, flags=flags,
2890 2940 deltacomputer=deltacomputer)
2891 2941 else:
2892 2942 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2893 2943 checkambig=False)
2894 2944 dfh = None
2895 2945 if not destrevlog._inline:
2896 2946 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2897 2947 try:
2898 2948 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2899 2949 p2, flags, cachedelta, ifh, dfh,
2900 2950 deltacomputer=deltacomputer)
2901 2951 finally:
2902 2952 if dfh:
2903 2953 dfh.close()
2904 2954 ifh.close()
2905 2955
2906 2956 if addrevisioncb:
2907 2957 addrevisioncb(self, rev, node)
2908 2958 finally:
2909 2959 destrevlog._lazydeltabase = oldlazydeltabase
2910 2960 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now