##// END OF EJS Templates
revlog: do not search for delta for empty content...
Boris Feld -
r39120:dbb3e9e4 default
parent child Browse files
Show More
@@ -1,2995 +1,2998 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # Trim empty revs at the end, but never the very first revision of a chain
267 267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 268 endidx -= 1
269 269
270 270 return revs[startidx:endidx]
271 271
272 272 def _segmentspan(revlog, revs):
273 273 """Get the byte span of a segment of revisions
274 274
275 275 revs is a sorted array of revision numbers
276 276
277 277 >>> revlog = _testrevlog([
278 278 ... 5, #0
279 279 ... 10, #1
280 280 ... 12, #2
281 281 ... 12, #3 (empty)
282 282 ... 17, #4
283 283 ... ])
284 284
285 285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 286 17
287 287 >>> _segmentspan(revlog, [0, 4])
288 288 17
289 289 >>> _segmentspan(revlog, [3, 4])
290 290 5
291 291 >>> _segmentspan(revlog, [1, 2, 3,])
292 292 7
293 293 >>> _segmentspan(revlog, [1, 3])
294 294 7
295 295 """
296 296 if not revs:
297 297 return 0
298 298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299 299
300 300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 301 """slice revs to reduce the amount of unrelated data to be read from disk.
302 302
303 303 ``revs`` is sliced into groups that should be read in one time.
304 304 Assume that revs are sorted.
305 305
306 306 The initial chunk is sliced until the overall density (payload/chunks-span
307 307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 308 `revlog._srmingapsize` is skipped.
309 309
310 310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 311 For consistency with other slicing choice, this limit won't go lower than
312 312 `revlog._srmingapsize`.
313 313
314 314 If individual revisions chunk are larger than this limit, they will still
315 315 be raised individually.
316 316
317 317 >>> revlog = _testrevlog([
318 318 ... 5, #00 (5)
319 319 ... 10, #01 (5)
320 320 ... 12, #02 (2)
321 321 ... 12, #03 (empty)
322 322 ... 27, #04 (15)
323 323 ... 31, #05 (4)
324 324 ... 31, #06 (empty)
325 325 ... 42, #07 (11)
326 326 ... 47, #08 (5)
327 327 ... 47, #09 (empty)
328 328 ... 48, #10 (1)
329 329 ... 51, #11 (3)
330 330 ... 74, #12 (23)
331 331 ... 85, #13 (11)
332 332 ... 86, #14 (1)
333 333 ... 91, #15 (5)
334 334 ... ])
335 335
336 336 >>> list(_slicechunk(revlog, list(range(16))))
337 337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 338 >>> list(_slicechunk(revlog, [0, 15]))
339 339 [[0], [15]]
340 340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 341 [[0], [11], [15]]
342 342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 343 [[0], [11, 13, 15]]
344 344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 345 [[1, 2], [5, 8, 10, 11], [14]]
346 346
347 347 Slicing with a maximum chunk size
348 348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 349 [[0], [11], [13], [15]]
350 350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 351 [[0], [11], [13, 15]]
352 352 """
353 353 if targetsize is not None:
354 354 targetsize = max(targetsize, revlog._srmingapsize)
355 355 # targetsize should not be specified when evaluating delta candidates:
356 356 # * targetsize is used to ensure we stay within specification when reading,
357 357 # * deltainfo is used to pick are good delta chain when writing.
358 358 if not (deltainfo is None or targetsize is None):
359 359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 360 raise error.ProgrammingError(msg)
361 361 for chunk in _slicechunktodensity(revlog, revs,
362 362 deltainfo,
363 363 revlog._srdensitythreshold,
364 364 revlog._srmingapsize):
365 365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 366 yield subchunk
367 367
368 368 def _slicechunktosize(revlog, revs, targetsize=None):
369 369 """slice revs to match the target size
370 370
371 371 This is intended to be used on chunk that density slicing selected by that
372 372 are still too large compared to the read garantee of revlog. This might
373 373 happens when "minimal gap size" interrupted the slicing or when chain are
374 374 built in a way that create large blocks next to each other.
375 375
376 376 >>> revlog = _testrevlog([
377 377 ... 3, #0 (3)
378 378 ... 5, #1 (2)
379 379 ... 6, #2 (1)
380 380 ... 8, #3 (2)
381 381 ... 8, #4 (empty)
382 382 ... 11, #5 (3)
383 383 ... 12, #6 (1)
384 384 ... 13, #7 (1)
385 385 ... 14, #8 (1)
386 386 ... ])
387 387
388 388 Cases where chunk is already small enough
389 389 >>> list(_slicechunktosize(revlog, [0], 3))
390 390 [[0]]
391 391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 392 [[6, 7]]
393 393 >>> list(_slicechunktosize(revlog, [0], None))
394 394 [[0]]
395 395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 396 [[6, 7]]
397 397
398 398 cases where we need actual slicing
399 399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 400 [[0], [1]]
401 401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 402 [[1], [3]]
403 403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 404 [[1, 2], [3]]
405 405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 406 [[3], [5]]
407 407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 408 [[3], [5]]
409 409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 410 [[5], [6, 7, 8]]
411 411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413 413
414 414 Case with too large individual chunk (must return valid chunk)
415 415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 416 [[0], [1]]
417 417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 418 [[1], [3]]
419 419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 420 [[3], [5]]
421 421 """
422 422 assert targetsize is None or 0 <= targetsize
423 423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 424 yield revs
425 425 return
426 426
427 427 startrevidx = 0
428 428 startdata = revlog.start(revs[0])
429 429 endrevidx = 0
430 430 iterrevs = enumerate(revs)
431 431 next(iterrevs) # skip first rev.
432 432 for idx, r in iterrevs:
433 433 span = revlog.end(r) - startdata
434 434 if span <= targetsize:
435 435 endrevidx = idx
436 436 else:
437 437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 438 if chunk:
439 439 yield chunk
440 440 startrevidx = idx
441 441 startdata = revlog.start(r)
442 442 endrevidx = idx
443 443 yield _trimchunk(revlog, revs, startrevidx)
444 444
445 445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 446 mingapsize=0):
447 447 """slice revs to reduce the amount of unrelated data to be read from disk.
448 448
449 449 ``revs`` is sliced into groups that should be read in one time.
450 450 Assume that revs are sorted.
451 451
452 452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 453 to the top of the revlog.
454 454
455 455 The initial chunk is sliced until the overall density (payload/chunks-span
456 456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 457 skipped.
458 458
459 459 >>> revlog = _testrevlog([
460 460 ... 5, #00 (5)
461 461 ... 10, #01 (5)
462 462 ... 12, #02 (2)
463 463 ... 12, #03 (empty)
464 464 ... 27, #04 (15)
465 465 ... 31, #05 (4)
466 466 ... 31, #06 (empty)
467 467 ... 42, #07 (11)
468 468 ... 47, #08 (5)
469 469 ... 47, #09 (empty)
470 470 ... 48, #10 (1)
471 471 ... 51, #11 (3)
472 472 ... 74, #12 (23)
473 473 ... 85, #13 (11)
474 474 ... 86, #14 (1)
475 475 ... 91, #15 (5)
476 476 ... ])
477 477
478 478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 481 [[0], [15]]
482 482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 483 [[0], [11], [15]]
484 484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 485 [[0], [11, 13, 15]]
486 486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 487 [[1, 2], [5, 8, 10, 11], [14]]
488 488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 489 ... mingapsize=20))
490 490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 492 ... targetdensity=0.95))
493 493 [[1, 2], [5], [8, 10, 11], [14]]
494 494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 495 ... targetdensity=0.95, mingapsize=12))
496 496 [[1, 2], [5, 8, 10, 11], [14]]
497 497 """
498 498 start = revlog.start
499 499 length = revlog.length
500 500
501 501 if len(revs) <= 1:
502 502 yield revs
503 503 return
504 504
505 505 nextrev = len(revlog)
506 506 nextoffset = revlog.end(nextrev - 1)
507 507
508 508 if deltainfo is None:
509 509 deltachainspan = _segmentspan(revlog, revs)
510 510 chainpayload = sum(length(r) for r in revs)
511 511 else:
512 512 deltachainspan = deltainfo.distance
513 513 chainpayload = deltainfo.compresseddeltalen
514 514
515 515 if deltachainspan < mingapsize:
516 516 yield revs
517 517 return
518 518
519 519 readdata = deltachainspan
520 520
521 521 if deltachainspan:
522 522 density = chainpayload / float(deltachainspan)
523 523 else:
524 524 density = 1.0
525 525
526 526 if density >= targetdensity:
527 527 yield revs
528 528 return
529 529
530 530 if deltainfo is not None:
531 531 revs = list(revs)
532 532 revs.append(nextrev)
533 533
534 534 # Store the gaps in a heap to have them sorted by decreasing size
535 535 gapsheap = []
536 536 heapq.heapify(gapsheap)
537 537 prevend = None
538 538 for i, rev in enumerate(revs):
539 539 if rev < nextrev:
540 540 revstart = start(rev)
541 541 revlen = length(rev)
542 542 else:
543 543 revstart = nextoffset
544 544 revlen = deltainfo.deltalen
545 545
546 546 # Skip empty revisions to form larger holes
547 547 if revlen == 0:
548 548 continue
549 549
550 550 if prevend is not None:
551 551 gapsize = revstart - prevend
552 552 # only consider holes that are large enough
553 553 if gapsize > mingapsize:
554 554 heapq.heappush(gapsheap, (-gapsize, i))
555 555
556 556 prevend = revstart + revlen
557 557
558 558 # Collect the indices of the largest holes until the density is acceptable
559 559 indicesheap = []
560 560 heapq.heapify(indicesheap)
561 561 while gapsheap and density < targetdensity:
562 562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563 563
564 564 heapq.heappush(indicesheap, gapidx)
565 565
566 566 # the gap sizes are stored as negatives to be sorted decreasingly
567 567 # by the heap
568 568 readdata -= (-oppgapsize)
569 569 if readdata > 0:
570 570 density = chainpayload / float(readdata)
571 571 else:
572 572 density = 1.0
573 573
574 574 # Cut the revs at collected indices
575 575 previdx = 0
576 576 while indicesheap:
577 577 idx = heapq.heappop(indicesheap)
578 578
579 579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 580 if chunk:
581 581 yield chunk
582 582
583 583 previdx = idx
584 584
585 585 chunk = _trimchunk(revlog, revs, previdx)
586 586 if chunk:
587 587 yield chunk
588 588
589 589 @attr.s(slots=True, frozen=True)
590 590 class _deltainfo(object):
591 591 distance = attr.ib()
592 592 deltalen = attr.ib()
593 593 data = attr.ib()
594 594 base = attr.ib()
595 595 chainbase = attr.ib()
596 596 chainlen = attr.ib()
597 597 compresseddeltalen = attr.ib()
598 598
599 599 class _deltacomputer(object):
600 600 def __init__(self, revlog):
601 601 self.revlog = revlog
602 602
603 603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 604 """
605 605 Provides revisions that present an interest to be diffed against,
606 606 grouped by level of easiness.
607 607 """
608 608 revlog = self.revlog
609 609 gdelta = revlog._generaldelta
610 610 curr = len(revlog)
611 611 prev = curr - 1
612 612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613 613
614 614 # should we try to build a delta?
615 615 if prev != nullrev and revlog.storedeltachains:
616 616 tested = set()
617 617 # This condition is true most of the time when processing
618 618 # changegroup data into a generaldelta repo. The only time it
619 619 # isn't true is if this is the first revision in a delta chain
620 620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 621 if cachedelta and gdelta and revlog._lazydeltabase:
622 622 # Assume what we received from the server is a good choice
623 623 # build delta will reuse the cache
624 624 yield (cachedelta[0],)
625 625 tested.add(cachedelta[0])
626 626
627 627 if gdelta:
628 628 # exclude already lazy tested base if any
629 629 parents = [p for p in (p1r, p2r)
630 630 if p != nullrev and p not in tested]
631 631
632 632 if not revlog._deltabothparents and len(parents) == 2:
633 633 parents.sort()
634 634 # To minimize the chance of having to build a fulltext,
635 635 # pick first whichever parent is closest to us (max rev)
636 636 yield (parents[1],)
637 637 # then the other one (min rev) if the first did not fit
638 638 yield (parents[0],)
639 639 tested.update(parents)
640 640 elif len(parents) > 0:
641 641 # Test all parents (1 or 2), and keep the best candidate
642 642 yield parents
643 643 tested.update(parents)
644 644
645 645 if prev not in tested:
646 646 # other approach failed try against prev to hopefully save us a
647 647 # fulltext.
648 648 yield (prev,)
649 649 tested.add(prev)
650 650
651 651 def buildtext(self, revinfo, fh):
652 652 """Builds a fulltext version of a revision
653 653
654 654 revinfo: _revisioninfo instance that contains all needed info
655 655 fh: file handle to either the .i or the .d revlog file,
656 656 depending on whether it is inlined or not
657 657 """
658 658 btext = revinfo.btext
659 659 if btext[0] is not None:
660 660 return btext[0]
661 661
662 662 revlog = self.revlog
663 663 cachedelta = revinfo.cachedelta
664 664 flags = revinfo.flags
665 665 node = revinfo.node
666 666
667 667 baserev = cachedelta[0]
668 668 delta = cachedelta[1]
669 669 # special case deltas which replace entire base; no need to decode
670 670 # base revision. this neatly avoids censored bases, which throw when
671 671 # they're decoded.
672 672 hlen = struct.calcsize(">lll")
673 673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 674 len(delta) - hlen):
675 675 btext[0] = delta[hlen:]
676 676 else:
677 677 # deltabase is rawtext before changed by flag processors, which is
678 678 # equivalent to non-raw text
679 679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 680 btext[0] = mdiff.patch(basetext, delta)
681 681
682 682 try:
683 683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 684 btext[0], validatehash = res
685 685 if validatehash:
686 686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 687 if flags & REVIDX_ISCENSORED:
688 688 raise RevlogError(_('node %s is not censored') % node)
689 689 except CensoredNodeError:
690 690 # must pass the censored index flag to add censored revisions
691 691 if not flags & REVIDX_ISCENSORED:
692 692 raise
693 693 return btext[0]
694 694
695 695 def _builddeltadiff(self, base, revinfo, fh):
696 696 revlog = self.revlog
697 697 t = self.buildtext(revinfo, fh)
698 698 if revlog.iscensored(base):
699 699 # deltas based on a censored revision must replace the
700 700 # full content in one patch, so delta works everywhere
701 701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 702 delta = header + t
703 703 else:
704 704 ptext = revlog.revision(base, _df=fh, raw=True)
705 705 delta = mdiff.textdiff(ptext, t)
706 706
707 707 return delta
708 708
709 709 def _builddeltainfo(self, revinfo, base, fh):
710 710 # can we use the cached delta?
711 711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 712 delta = revinfo.cachedelta[1]
713 713 else:
714 714 delta = self._builddeltadiff(base, revinfo, fh)
715 715 revlog = self.revlog
716 716 header, data = revlog.compress(delta)
717 717 deltalen = len(header) + len(data)
718 718 chainbase = revlog.chainbase(base)
719 719 offset = revlog.end(len(revlog) - 1)
720 720 dist = deltalen + offset - revlog.start(chainbase)
721 721 if revlog._generaldelta:
722 722 deltabase = base
723 723 else:
724 724 deltabase = chainbase
725 725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 726 chainlen += 1
727 727 compresseddeltalen += deltalen
728 728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 729 chainbase, chainlen, compresseddeltalen)
730 730
731 731 def finddeltainfo(self, revinfo, fh):
732 732 """Find an acceptable delta against a candidate revision
733 733
734 734 revinfo: information about the revision (instance of _revisioninfo)
735 735 fh: file handle to either the .i or the .d revlog file,
736 736 depending on whether it is inlined or not
737 737
738 738 Returns the first acceptable candidate revision, as ordered by
739 739 _getcandidaterevs
740 740 """
741 if not revinfo.textlen:
742 return None # empty file do not need delta
743
741 744 cachedelta = revinfo.cachedelta
742 745 p1 = revinfo.p1
743 746 p2 = revinfo.p2
744 747 revlog = self.revlog
745 748
746 749 deltalength = self.revlog.length
747 750 deltaparent = self.revlog.deltaparent
748 751
749 752 deltainfo = None
750 753 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
751 754 nominateddeltas = []
752 755 for candidaterev in candidaterevs:
753 756 # skip over empty delta (no need to include them in a chain)
754 757 while candidaterev != nullrev and not deltalength(candidaterev):
755 758 candidaterev = deltaparent(candidaterev)
756 759 # no need to try a delta against nullid, this will be handled
757 760 # by fulltext later.
758 761 if candidaterev == nullrev:
759 762 continue
760 763 # no delta for rawtext-changing revs (see "candelta" for why)
761 764 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
762 765 continue
763 766 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
764 767 if revlog._isgooddeltainfo(candidatedelta, revinfo):
765 768 nominateddeltas.append(candidatedelta)
766 769 if nominateddeltas:
767 770 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
768 771 break
769 772
770 773 return deltainfo
771 774
772 775 @attr.s(slots=True, frozen=True)
773 776 class _revisioninfo(object):
774 777 """Information about a revision that allows building its fulltext
775 778 node: expected hash of the revision
776 779 p1, p2: parent revs of the revision
777 780 btext: built text cache consisting of a one-element list
778 781 cachedelta: (baserev, uncompressed_delta) or None
779 782 flags: flags associated to the revision storage
780 783
781 784 One of btext[0] or cachedelta must be set.
782 785 """
783 786 node = attr.ib()
784 787 p1 = attr.ib()
785 788 p2 = attr.ib()
786 789 btext = attr.ib()
787 790 textlen = attr.ib()
788 791 cachedelta = attr.ib()
789 792 flags = attr.ib()
790 793
791 794 # index v0:
792 795 # 4 bytes: offset
793 796 # 4 bytes: compressed length
794 797 # 4 bytes: base rev
795 798 # 4 bytes: link rev
796 799 # 20 bytes: parent 1 nodeid
797 800 # 20 bytes: parent 2 nodeid
798 801 # 20 bytes: nodeid
799 802 indexformatv0 = struct.Struct(">4l20s20s20s")
800 803 indexformatv0_pack = indexformatv0.pack
801 804 indexformatv0_unpack = indexformatv0.unpack
802 805
803 806 class revlogoldindex(list):
804 807 def __getitem__(self, i):
805 808 if i == -1:
806 809 return (0, 0, 0, -1, -1, -1, -1, nullid)
807 810 return list.__getitem__(self, i)
808 811
809 812 class revlogoldio(object):
810 813 def __init__(self):
811 814 self.size = indexformatv0.size
812 815
813 816 def parseindex(self, data, inline):
814 817 s = self.size
815 818 index = []
816 819 nodemap = {nullid: nullrev}
817 820 n = off = 0
818 821 l = len(data)
819 822 while off + s <= l:
820 823 cur = data[off:off + s]
821 824 off += s
822 825 e = indexformatv0_unpack(cur)
823 826 # transform to revlogv1 format
824 827 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
825 828 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
826 829 index.append(e2)
827 830 nodemap[e[6]] = n
828 831 n += 1
829 832
830 833 return revlogoldindex(index), nodemap, None
831 834
832 835 def packentry(self, entry, node, version, rev):
833 836 if gettype(entry[0]):
834 837 raise RevlogError(_('index entry flags need revlog version 1'))
835 838 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
836 839 node(entry[5]), node(entry[6]), entry[7])
837 840 return indexformatv0_pack(*e2)
838 841
839 842 # index ng:
840 843 # 6 bytes: offset
841 844 # 2 bytes: flags
842 845 # 4 bytes: compressed length
843 846 # 4 bytes: uncompressed length
844 847 # 4 bytes: base rev
845 848 # 4 bytes: link rev
846 849 # 4 bytes: parent 1 rev
847 850 # 4 bytes: parent 2 rev
848 851 # 32 bytes: nodeid
849 852 indexformatng = struct.Struct(">Qiiiiii20s12x")
850 853 indexformatng_pack = indexformatng.pack
851 854 versionformat = struct.Struct(">I")
852 855 versionformat_pack = versionformat.pack
853 856 versionformat_unpack = versionformat.unpack
854 857
855 858 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
856 859 # signed integer)
857 860 _maxentrysize = 0x7fffffff
858 861
859 862 class revlogio(object):
860 863 def __init__(self):
861 864 self.size = indexformatng.size
862 865
863 866 def parseindex(self, data, inline):
864 867 # call the C implementation to parse the index data
865 868 index, cache = parsers.parse_index2(data, inline)
866 869 return index, getattr(index, 'nodemap', None), cache
867 870
868 871 def packentry(self, entry, node, version, rev):
869 872 p = indexformatng_pack(*entry)
870 873 if rev == 0:
871 874 p = versionformat_pack(version) + p[4:]
872 875 return p
873 876
874 877 class revlog(object):
875 878 """
876 879 the underlying revision storage object
877 880
878 881 A revlog consists of two parts, an index and the revision data.
879 882
880 883 The index is a file with a fixed record size containing
881 884 information on each revision, including its nodeid (hash), the
882 885 nodeids of its parents, the position and offset of its data within
883 886 the data file, and the revision it's based on. Finally, each entry
884 887 contains a linkrev entry that can serve as a pointer to external
885 888 data.
886 889
887 890 The revision data itself is a linear collection of data chunks.
888 891 Each chunk represents a revision and is usually represented as a
889 892 delta against the previous chunk. To bound lookup time, runs of
890 893 deltas are limited to about 2 times the length of the original
891 894 version data. This makes retrieval of a version proportional to
892 895 its size, or O(1) relative to the number of revisions.
893 896
894 897 Both pieces of the revlog are written to in an append-only
895 898 fashion, which means we never need to rewrite a file to insert or
896 899 remove data, and can use some simple techniques to avoid the need
897 900 for locking while reading.
898 901
899 902 If checkambig, indexfile is opened with checkambig=True at
900 903 writing, to avoid file stat ambiguity.
901 904
902 905 If mmaplargeindex is True, and an mmapindexthreshold is set, the
903 906 index will be mmapped rather than read if it is larger than the
904 907 configured threshold.
905 908
906 909 If censorable is True, the revlog can have censored revisions.
907 910 """
908 911 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
909 912 mmaplargeindex=False, censorable=False):
910 913 """
911 914 create a revlog object
912 915
913 916 opener is a function that abstracts the file opening operation
914 917 and can be used to implement COW semantics or the like.
915 918 """
916 919 self.indexfile = indexfile
917 920 self.datafile = datafile or (indexfile[:-2] + ".d")
918 921 self.opener = opener
919 922 # When True, indexfile is opened with checkambig=True at writing, to
920 923 # avoid file stat ambiguity.
921 924 self._checkambig = checkambig
922 925 self._censorable = censorable
923 926 # 3-tuple of (node, rev, text) for a raw revision.
924 927 self._cache = None
925 928 # Maps rev to chain base rev.
926 929 self._chainbasecache = util.lrucachedict(100)
927 930 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
928 931 self._chunkcache = (0, '')
929 932 # How much data to read and cache into the raw revlog data cache.
930 933 self._chunkcachesize = 65536
931 934 self._maxchainlen = None
932 935 self._deltabothparents = True
933 936 self.index = []
934 937 # Mapping of partial identifiers to full nodes.
935 938 self._pcache = {}
936 939 # Mapping of revision integer to full node.
937 940 self._nodecache = {nullid: nullrev}
938 941 self._nodepos = None
939 942 self._compengine = 'zlib'
940 943 self._maxdeltachainspan = -1
941 944 self._withsparseread = False
942 945 self._sparserevlog = False
943 946 self._srdensitythreshold = 0.50
944 947 self._srmingapsize = 262144
945 948
946 949 mmapindexthreshold = None
947 950 v = REVLOG_DEFAULT_VERSION
948 951 opts = getattr(opener, 'options', None)
949 952 if opts is not None:
950 953 if 'revlogv2' in opts:
951 954 # version 2 revlogs always use generaldelta.
952 955 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
953 956 elif 'revlogv1' in opts:
954 957 if 'generaldelta' in opts:
955 958 v |= FLAG_GENERALDELTA
956 959 else:
957 960 v = 0
958 961 if 'chunkcachesize' in opts:
959 962 self._chunkcachesize = opts['chunkcachesize']
960 963 if 'maxchainlen' in opts:
961 964 self._maxchainlen = opts['maxchainlen']
962 965 if 'deltabothparents' in opts:
963 966 self._deltabothparents = opts['deltabothparents']
964 967 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
965 968 if 'compengine' in opts:
966 969 self._compengine = opts['compengine']
967 970 if 'maxdeltachainspan' in opts:
968 971 self._maxdeltachainspan = opts['maxdeltachainspan']
969 972 if mmaplargeindex and 'mmapindexthreshold' in opts:
970 973 mmapindexthreshold = opts['mmapindexthreshold']
971 974 self._sparserevlog = bool(opts.get('sparse-revlog', False))
972 975 withsparseread = bool(opts.get('with-sparse-read', False))
973 976 # sparse-revlog forces sparse-read
974 977 self._withsparseread = self._sparserevlog or withsparseread
975 978 if 'sparse-read-density-threshold' in opts:
976 979 self._srdensitythreshold = opts['sparse-read-density-threshold']
977 980 if 'sparse-read-min-gap-size' in opts:
978 981 self._srmingapsize = opts['sparse-read-min-gap-size']
979 982
980 983 if self._chunkcachesize <= 0:
981 984 raise RevlogError(_('revlog chunk cache size %r is not greater '
982 985 'than 0') % self._chunkcachesize)
983 986 elif self._chunkcachesize & (self._chunkcachesize - 1):
984 987 raise RevlogError(_('revlog chunk cache size %r is not a power '
985 988 'of 2') % self._chunkcachesize)
986 989
987 990 indexdata = ''
988 991 self._initempty = True
989 992 try:
990 993 with self._indexfp() as f:
991 994 if (mmapindexthreshold is not None and
992 995 self.opener.fstat(f).st_size >= mmapindexthreshold):
993 996 indexdata = util.buffer(util.mmapread(f))
994 997 else:
995 998 indexdata = f.read()
996 999 if len(indexdata) > 0:
997 1000 v = versionformat_unpack(indexdata[:4])[0]
998 1001 self._initempty = False
999 1002 except IOError as inst:
1000 1003 if inst.errno != errno.ENOENT:
1001 1004 raise
1002 1005
1003 1006 self.version = v
1004 1007 self._inline = v & FLAG_INLINE_DATA
1005 1008 self._generaldelta = v & FLAG_GENERALDELTA
1006 1009 flags = v & ~0xFFFF
1007 1010 fmt = v & 0xFFFF
1008 1011 if fmt == REVLOGV0:
1009 1012 if flags:
1010 1013 raise RevlogError(_('unknown flags (%#04x) in version %d '
1011 1014 'revlog %s') %
1012 1015 (flags >> 16, fmt, self.indexfile))
1013 1016 elif fmt == REVLOGV1:
1014 1017 if flags & ~REVLOGV1_FLAGS:
1015 1018 raise RevlogError(_('unknown flags (%#04x) in version %d '
1016 1019 'revlog %s') %
1017 1020 (flags >> 16, fmt, self.indexfile))
1018 1021 elif fmt == REVLOGV2:
1019 1022 if flags & ~REVLOGV2_FLAGS:
1020 1023 raise RevlogError(_('unknown flags (%#04x) in version %d '
1021 1024 'revlog %s') %
1022 1025 (flags >> 16, fmt, self.indexfile))
1023 1026 else:
1024 1027 raise RevlogError(_('unknown version (%d) in revlog %s') %
1025 1028 (fmt, self.indexfile))
1026 1029
1027 1030 self.storedeltachains = True
1028 1031
1029 1032 self._io = revlogio()
1030 1033 if self.version == REVLOGV0:
1031 1034 self._io = revlogoldio()
1032 1035 try:
1033 1036 d = self._io.parseindex(indexdata, self._inline)
1034 1037 except (ValueError, IndexError):
1035 1038 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1036 1039 self.index, nodemap, self._chunkcache = d
1037 1040 if nodemap is not None:
1038 1041 self.nodemap = self._nodecache = nodemap
1039 1042 if not self._chunkcache:
1040 1043 self._chunkclear()
1041 1044 # revnum -> (chain-length, sum-delta-length)
1042 1045 self._chaininfocache = {}
1043 1046 # revlog header -> revlog compressor
1044 1047 self._decompressors = {}
1045 1048
1046 1049 @util.propertycache
1047 1050 def _compressor(self):
1048 1051 return util.compengines[self._compengine].revlogcompressor()
1049 1052
1050 1053 def _indexfp(self, mode='r'):
1051 1054 """file object for the revlog's index file"""
1052 1055 args = {r'mode': mode}
1053 1056 if mode != 'r':
1054 1057 args[r'checkambig'] = self._checkambig
1055 1058 if mode == 'w':
1056 1059 args[r'atomictemp'] = True
1057 1060 return self.opener(self.indexfile, **args)
1058 1061
1059 1062 def _datafp(self, mode='r'):
1060 1063 """file object for the revlog's data file"""
1061 1064 return self.opener(self.datafile, mode=mode)
1062 1065
1063 1066 @contextlib.contextmanager
1064 1067 def _datareadfp(self, existingfp=None):
1065 1068 """file object suitable to read data"""
1066 1069 if existingfp is not None:
1067 1070 yield existingfp
1068 1071 else:
1069 1072 if self._inline:
1070 1073 func = self._indexfp
1071 1074 else:
1072 1075 func = self._datafp
1073 1076 with func() as fp:
1074 1077 yield fp
1075 1078
1076 1079 def tip(self):
1077 1080 return self.node(len(self.index) - 1)
1078 1081 def __contains__(self, rev):
1079 1082 return 0 <= rev < len(self)
1080 1083 def __len__(self):
1081 1084 return len(self.index)
1082 1085 def __iter__(self):
1083 1086 return iter(pycompat.xrange(len(self)))
1084 1087 def revs(self, start=0, stop=None):
1085 1088 """iterate over all rev in this revlog (from start to stop)"""
1086 1089 step = 1
1087 1090 length = len(self)
1088 1091 if stop is not None:
1089 1092 if start > stop:
1090 1093 step = -1
1091 1094 stop += step
1092 1095 if stop > length:
1093 1096 stop = length
1094 1097 else:
1095 1098 stop = length
1096 1099 return pycompat.xrange(start, stop, step)
1097 1100
1098 1101 @util.propertycache
1099 1102 def nodemap(self):
1100 1103 self.rev(self.node(0))
1101 1104 return self._nodecache
1102 1105
1103 1106 def hasnode(self, node):
1104 1107 try:
1105 1108 self.rev(node)
1106 1109 return True
1107 1110 except KeyError:
1108 1111 return False
1109 1112
1110 1113 def candelta(self, baserev, rev):
1111 1114 """whether two revisions (baserev, rev) can be delta-ed or not"""
1112 1115 # Disable delta if either rev requires a content-changing flag
1113 1116 # processor (ex. LFS). This is because such flag processor can alter
1114 1117 # the rawtext content that the delta will be based on, and two clients
1115 1118 # could have a same revlog node with different flags (i.e. different
1116 1119 # rawtext contents) and the delta could be incompatible.
1117 1120 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1118 1121 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1119 1122 return False
1120 1123 return True
1121 1124
1122 1125 def clearcaches(self):
1123 1126 self._cache = None
1124 1127 self._chainbasecache.clear()
1125 1128 self._chunkcache = (0, '')
1126 1129 self._pcache = {}
1127 1130
1128 1131 try:
1129 1132 self._nodecache.clearcaches()
1130 1133 except AttributeError:
1131 1134 self._nodecache = {nullid: nullrev}
1132 1135 self._nodepos = None
1133 1136
1134 1137 def rev(self, node):
1135 1138 try:
1136 1139 return self._nodecache[node]
1137 1140 except TypeError:
1138 1141 raise
1139 1142 except RevlogError:
1140 1143 # parsers.c radix tree lookup failed
1141 1144 if node == wdirid or node in wdirfilenodeids:
1142 1145 raise error.WdirUnsupported
1143 1146 raise LookupError(node, self.indexfile, _('no node'))
1144 1147 except KeyError:
1145 1148 # pure python cache lookup failed
1146 1149 n = self._nodecache
1147 1150 i = self.index
1148 1151 p = self._nodepos
1149 1152 if p is None:
1150 1153 p = len(i) - 1
1151 1154 else:
1152 1155 assert p < len(i)
1153 1156 for r in pycompat.xrange(p, -1, -1):
1154 1157 v = i[r][7]
1155 1158 n[v] = r
1156 1159 if v == node:
1157 1160 self._nodepos = r - 1
1158 1161 return r
1159 1162 if node == wdirid or node in wdirfilenodeids:
1160 1163 raise error.WdirUnsupported
1161 1164 raise LookupError(node, self.indexfile, _('no node'))
1162 1165
1163 1166 # Accessors for index entries.
1164 1167
1165 1168 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1166 1169 # are flags.
1167 1170 def start(self, rev):
1168 1171 return int(self.index[rev][0] >> 16)
1169 1172
1170 1173 def flags(self, rev):
1171 1174 return self.index[rev][0] & 0xFFFF
1172 1175
1173 1176 def length(self, rev):
1174 1177 return self.index[rev][1]
1175 1178
1176 1179 def rawsize(self, rev):
1177 1180 """return the length of the uncompressed text for a given revision"""
1178 1181 l = self.index[rev][2]
1179 1182 if l >= 0:
1180 1183 return l
1181 1184
1182 1185 t = self.revision(rev, raw=True)
1183 1186 return len(t)
1184 1187
1185 1188 def size(self, rev):
1186 1189 """length of non-raw text (processed by a "read" flag processor)"""
1187 1190 # fast path: if no "read" flag processor could change the content,
1188 1191 # size is rawsize. note: ELLIPSIS is known to not change the content.
1189 1192 flags = self.flags(rev)
1190 1193 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1191 1194 return self.rawsize(rev)
1192 1195
1193 1196 return len(self.revision(rev, raw=False))
1194 1197
1195 1198 def chainbase(self, rev):
1196 1199 base = self._chainbasecache.get(rev)
1197 1200 if base is not None:
1198 1201 return base
1199 1202
1200 1203 index = self.index
1201 1204 iterrev = rev
1202 1205 base = index[iterrev][3]
1203 1206 while base != iterrev:
1204 1207 iterrev = base
1205 1208 base = index[iterrev][3]
1206 1209
1207 1210 self._chainbasecache[rev] = base
1208 1211 return base
1209 1212
1210 1213 def linkrev(self, rev):
1211 1214 return self.index[rev][4]
1212 1215
1213 1216 def parentrevs(self, rev):
1214 1217 try:
1215 1218 entry = self.index[rev]
1216 1219 except IndexError:
1217 1220 if rev == wdirrev:
1218 1221 raise error.WdirUnsupported
1219 1222 raise
1220 1223
1221 1224 return entry[5], entry[6]
1222 1225
1223 1226 def node(self, rev):
1224 1227 try:
1225 1228 return self.index[rev][7]
1226 1229 except IndexError:
1227 1230 if rev == wdirrev:
1228 1231 raise error.WdirUnsupported
1229 1232 raise
1230 1233
1231 1234 # Derived from index values.
1232 1235
1233 1236 def end(self, rev):
1234 1237 return self.start(rev) + self.length(rev)
1235 1238
1236 1239 def parents(self, node):
1237 1240 i = self.index
1238 1241 d = i[self.rev(node)]
1239 1242 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1240 1243
1241 1244 def chainlen(self, rev):
1242 1245 return self._chaininfo(rev)[0]
1243 1246
1244 1247 def _chaininfo(self, rev):
1245 1248 chaininfocache = self._chaininfocache
1246 1249 if rev in chaininfocache:
1247 1250 return chaininfocache[rev]
1248 1251 index = self.index
1249 1252 generaldelta = self._generaldelta
1250 1253 iterrev = rev
1251 1254 e = index[iterrev]
1252 1255 clen = 0
1253 1256 compresseddeltalen = 0
1254 1257 while iterrev != e[3]:
1255 1258 clen += 1
1256 1259 compresseddeltalen += e[1]
1257 1260 if generaldelta:
1258 1261 iterrev = e[3]
1259 1262 else:
1260 1263 iterrev -= 1
1261 1264 if iterrev in chaininfocache:
1262 1265 t = chaininfocache[iterrev]
1263 1266 clen += t[0]
1264 1267 compresseddeltalen += t[1]
1265 1268 break
1266 1269 e = index[iterrev]
1267 1270 else:
1268 1271 # Add text length of base since decompressing that also takes
1269 1272 # work. For cache hits the length is already included.
1270 1273 compresseddeltalen += e[1]
1271 1274 r = (clen, compresseddeltalen)
1272 1275 chaininfocache[rev] = r
1273 1276 return r
1274 1277
1275 1278 def _deltachain(self, rev, stoprev=None):
1276 1279 """Obtain the delta chain for a revision.
1277 1280
1278 1281 ``stoprev`` specifies a revision to stop at. If not specified, we
1279 1282 stop at the base of the chain.
1280 1283
1281 1284 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1282 1285 revs in ascending order and ``stopped`` is a bool indicating whether
1283 1286 ``stoprev`` was hit.
1284 1287 """
1285 1288 # Try C implementation.
1286 1289 try:
1287 1290 return self.index.deltachain(rev, stoprev, self._generaldelta)
1288 1291 except AttributeError:
1289 1292 pass
1290 1293
1291 1294 chain = []
1292 1295
1293 1296 # Alias to prevent attribute lookup in tight loop.
1294 1297 index = self.index
1295 1298 generaldelta = self._generaldelta
1296 1299
1297 1300 iterrev = rev
1298 1301 e = index[iterrev]
1299 1302 while iterrev != e[3] and iterrev != stoprev:
1300 1303 chain.append(iterrev)
1301 1304 if generaldelta:
1302 1305 iterrev = e[3]
1303 1306 else:
1304 1307 iterrev -= 1
1305 1308 e = index[iterrev]
1306 1309
1307 1310 if iterrev == stoprev:
1308 1311 stopped = True
1309 1312 else:
1310 1313 chain.append(iterrev)
1311 1314 stopped = False
1312 1315
1313 1316 chain.reverse()
1314 1317 return chain, stopped
1315 1318
1316 1319 def ancestors(self, revs, stoprev=0, inclusive=False):
1317 1320 """Generate the ancestors of 'revs' in reverse topological order.
1318 1321 Does not generate revs lower than stoprev.
1319 1322
1320 1323 See the documentation for ancestor.lazyancestors for more details."""
1321 1324
1322 1325 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1323 1326 inclusive=inclusive)
1324 1327
1325 1328 def descendants(self, revs):
1326 1329 """Generate the descendants of 'revs' in revision order.
1327 1330
1328 1331 Yield a sequence of revision numbers starting with a child of
1329 1332 some rev in revs, i.e., each revision is *not* considered a
1330 1333 descendant of itself. Results are ordered by revision number (a
1331 1334 topological sort)."""
1332 1335 first = min(revs)
1333 1336 if first == nullrev:
1334 1337 for i in self:
1335 1338 yield i
1336 1339 return
1337 1340
1338 1341 seen = set(revs)
1339 1342 for i in self.revs(start=first + 1):
1340 1343 for x in self.parentrevs(i):
1341 1344 if x != nullrev and x in seen:
1342 1345 seen.add(i)
1343 1346 yield i
1344 1347 break
1345 1348
1346 1349 def findcommonmissing(self, common=None, heads=None):
1347 1350 """Return a tuple of the ancestors of common and the ancestors of heads
1348 1351 that are not ancestors of common. In revset terminology, we return the
1349 1352 tuple:
1350 1353
1351 1354 ::common, (::heads) - (::common)
1352 1355
1353 1356 The list is sorted by revision number, meaning it is
1354 1357 topologically sorted.
1355 1358
1356 1359 'heads' and 'common' are both lists of node IDs. If heads is
1357 1360 not supplied, uses all of the revlog's heads. If common is not
1358 1361 supplied, uses nullid."""
1359 1362 if common is None:
1360 1363 common = [nullid]
1361 1364 if heads is None:
1362 1365 heads = self.heads()
1363 1366
1364 1367 common = [self.rev(n) for n in common]
1365 1368 heads = [self.rev(n) for n in heads]
1366 1369
1367 1370 # we want the ancestors, but inclusive
1368 1371 class lazyset(object):
1369 1372 def __init__(self, lazyvalues):
1370 1373 self.addedvalues = set()
1371 1374 self.lazyvalues = lazyvalues
1372 1375
1373 1376 def __contains__(self, value):
1374 1377 return value in self.addedvalues or value in self.lazyvalues
1375 1378
1376 1379 def __iter__(self):
1377 1380 added = self.addedvalues
1378 1381 for r in added:
1379 1382 yield r
1380 1383 for r in self.lazyvalues:
1381 1384 if not r in added:
1382 1385 yield r
1383 1386
1384 1387 def add(self, value):
1385 1388 self.addedvalues.add(value)
1386 1389
1387 1390 def update(self, values):
1388 1391 self.addedvalues.update(values)
1389 1392
1390 1393 has = lazyset(self.ancestors(common))
1391 1394 has.add(nullrev)
1392 1395 has.update(common)
1393 1396
1394 1397 # take all ancestors from heads that aren't in has
1395 1398 missing = set()
1396 1399 visit = collections.deque(r for r in heads if r not in has)
1397 1400 while visit:
1398 1401 r = visit.popleft()
1399 1402 if r in missing:
1400 1403 continue
1401 1404 else:
1402 1405 missing.add(r)
1403 1406 for p in self.parentrevs(r):
1404 1407 if p not in has:
1405 1408 visit.append(p)
1406 1409 missing = list(missing)
1407 1410 missing.sort()
1408 1411 return has, [self.node(miss) for miss in missing]
1409 1412
1410 1413 def incrementalmissingrevs(self, common=None):
1411 1414 """Return an object that can be used to incrementally compute the
1412 1415 revision numbers of the ancestors of arbitrary sets that are not
1413 1416 ancestors of common. This is an ancestor.incrementalmissingancestors
1414 1417 object.
1415 1418
1416 1419 'common' is a list of revision numbers. If common is not supplied, uses
1417 1420 nullrev.
1418 1421 """
1419 1422 if common is None:
1420 1423 common = [nullrev]
1421 1424
1422 1425 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1423 1426
1424 1427 def findmissingrevs(self, common=None, heads=None):
1425 1428 """Return the revision numbers of the ancestors of heads that
1426 1429 are not ancestors of common.
1427 1430
1428 1431 More specifically, return a list of revision numbers corresponding to
1429 1432 nodes N such that every N satisfies the following constraints:
1430 1433
1431 1434 1. N is an ancestor of some node in 'heads'
1432 1435 2. N is not an ancestor of any node in 'common'
1433 1436
1434 1437 The list is sorted by revision number, meaning it is
1435 1438 topologically sorted.
1436 1439
1437 1440 'heads' and 'common' are both lists of revision numbers. If heads is
1438 1441 not supplied, uses all of the revlog's heads. If common is not
1439 1442 supplied, uses nullid."""
1440 1443 if common is None:
1441 1444 common = [nullrev]
1442 1445 if heads is None:
1443 1446 heads = self.headrevs()
1444 1447
1445 1448 inc = self.incrementalmissingrevs(common=common)
1446 1449 return inc.missingancestors(heads)
1447 1450
1448 1451 def findmissing(self, common=None, heads=None):
1449 1452 """Return the ancestors of heads that are not ancestors of common.
1450 1453
1451 1454 More specifically, return a list of nodes N such that every N
1452 1455 satisfies the following constraints:
1453 1456
1454 1457 1. N is an ancestor of some node in 'heads'
1455 1458 2. N is not an ancestor of any node in 'common'
1456 1459
1457 1460 The list is sorted by revision number, meaning it is
1458 1461 topologically sorted.
1459 1462
1460 1463 'heads' and 'common' are both lists of node IDs. If heads is
1461 1464 not supplied, uses all of the revlog's heads. If common is not
1462 1465 supplied, uses nullid."""
1463 1466 if common is None:
1464 1467 common = [nullid]
1465 1468 if heads is None:
1466 1469 heads = self.heads()
1467 1470
1468 1471 common = [self.rev(n) for n in common]
1469 1472 heads = [self.rev(n) for n in heads]
1470 1473
1471 1474 inc = self.incrementalmissingrevs(common=common)
1472 1475 return [self.node(r) for r in inc.missingancestors(heads)]
1473 1476
1474 1477 def nodesbetween(self, roots=None, heads=None):
1475 1478 """Return a topological path from 'roots' to 'heads'.
1476 1479
1477 1480 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1478 1481 topologically sorted list of all nodes N that satisfy both of
1479 1482 these constraints:
1480 1483
1481 1484 1. N is a descendant of some node in 'roots'
1482 1485 2. N is an ancestor of some node in 'heads'
1483 1486
1484 1487 Every node is considered to be both a descendant and an ancestor
1485 1488 of itself, so every reachable node in 'roots' and 'heads' will be
1486 1489 included in 'nodes'.
1487 1490
1488 1491 'outroots' is the list of reachable nodes in 'roots', i.e., the
1489 1492 subset of 'roots' that is returned in 'nodes'. Likewise,
1490 1493 'outheads' is the subset of 'heads' that is also in 'nodes'.
1491 1494
1492 1495 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1493 1496 unspecified, uses nullid as the only root. If 'heads' is
1494 1497 unspecified, uses list of all of the revlog's heads."""
1495 1498 nonodes = ([], [], [])
1496 1499 if roots is not None:
1497 1500 roots = list(roots)
1498 1501 if not roots:
1499 1502 return nonodes
1500 1503 lowestrev = min([self.rev(n) for n in roots])
1501 1504 else:
1502 1505 roots = [nullid] # Everybody's a descendant of nullid
1503 1506 lowestrev = nullrev
1504 1507 if (lowestrev == nullrev) and (heads is None):
1505 1508 # We want _all_ the nodes!
1506 1509 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1507 1510 if heads is None:
1508 1511 # All nodes are ancestors, so the latest ancestor is the last
1509 1512 # node.
1510 1513 highestrev = len(self) - 1
1511 1514 # Set ancestors to None to signal that every node is an ancestor.
1512 1515 ancestors = None
1513 1516 # Set heads to an empty dictionary for later discovery of heads
1514 1517 heads = {}
1515 1518 else:
1516 1519 heads = list(heads)
1517 1520 if not heads:
1518 1521 return nonodes
1519 1522 ancestors = set()
1520 1523 # Turn heads into a dictionary so we can remove 'fake' heads.
1521 1524 # Also, later we will be using it to filter out the heads we can't
1522 1525 # find from roots.
1523 1526 heads = dict.fromkeys(heads, False)
1524 1527 # Start at the top and keep marking parents until we're done.
1525 1528 nodestotag = set(heads)
1526 1529 # Remember where the top was so we can use it as a limit later.
1527 1530 highestrev = max([self.rev(n) for n in nodestotag])
1528 1531 while nodestotag:
1529 1532 # grab a node to tag
1530 1533 n = nodestotag.pop()
1531 1534 # Never tag nullid
1532 1535 if n == nullid:
1533 1536 continue
1534 1537 # A node's revision number represents its place in a
1535 1538 # topologically sorted list of nodes.
1536 1539 r = self.rev(n)
1537 1540 if r >= lowestrev:
1538 1541 if n not in ancestors:
1539 1542 # If we are possibly a descendant of one of the roots
1540 1543 # and we haven't already been marked as an ancestor
1541 1544 ancestors.add(n) # Mark as ancestor
1542 1545 # Add non-nullid parents to list of nodes to tag.
1543 1546 nodestotag.update([p for p in self.parents(n) if
1544 1547 p != nullid])
1545 1548 elif n in heads: # We've seen it before, is it a fake head?
1546 1549 # So it is, real heads should not be the ancestors of
1547 1550 # any other heads.
1548 1551 heads.pop(n)
1549 1552 if not ancestors:
1550 1553 return nonodes
1551 1554 # Now that we have our set of ancestors, we want to remove any
1552 1555 # roots that are not ancestors.
1553 1556
1554 1557 # If one of the roots was nullid, everything is included anyway.
1555 1558 if lowestrev > nullrev:
1556 1559 # But, since we weren't, let's recompute the lowest rev to not
1557 1560 # include roots that aren't ancestors.
1558 1561
1559 1562 # Filter out roots that aren't ancestors of heads
1560 1563 roots = [root for root in roots if root in ancestors]
1561 1564 # Recompute the lowest revision
1562 1565 if roots:
1563 1566 lowestrev = min([self.rev(root) for root in roots])
1564 1567 else:
1565 1568 # No more roots? Return empty list
1566 1569 return nonodes
1567 1570 else:
1568 1571 # We are descending from nullid, and don't need to care about
1569 1572 # any other roots.
1570 1573 lowestrev = nullrev
1571 1574 roots = [nullid]
1572 1575 # Transform our roots list into a set.
1573 1576 descendants = set(roots)
1574 1577 # Also, keep the original roots so we can filter out roots that aren't
1575 1578 # 'real' roots (i.e. are descended from other roots).
1576 1579 roots = descendants.copy()
1577 1580 # Our topologically sorted list of output nodes.
1578 1581 orderedout = []
1579 1582 # Don't start at nullid since we don't want nullid in our output list,
1580 1583 # and if nullid shows up in descendants, empty parents will look like
1581 1584 # they're descendants.
1582 1585 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1583 1586 n = self.node(r)
1584 1587 isdescendant = False
1585 1588 if lowestrev == nullrev: # Everybody is a descendant of nullid
1586 1589 isdescendant = True
1587 1590 elif n in descendants:
1588 1591 # n is already a descendant
1589 1592 isdescendant = True
1590 1593 # This check only needs to be done here because all the roots
1591 1594 # will start being marked is descendants before the loop.
1592 1595 if n in roots:
1593 1596 # If n was a root, check if it's a 'real' root.
1594 1597 p = tuple(self.parents(n))
1595 1598 # If any of its parents are descendants, it's not a root.
1596 1599 if (p[0] in descendants) or (p[1] in descendants):
1597 1600 roots.remove(n)
1598 1601 else:
1599 1602 p = tuple(self.parents(n))
1600 1603 # A node is a descendant if either of its parents are
1601 1604 # descendants. (We seeded the dependents list with the roots
1602 1605 # up there, remember?)
1603 1606 if (p[0] in descendants) or (p[1] in descendants):
1604 1607 descendants.add(n)
1605 1608 isdescendant = True
1606 1609 if isdescendant and ((ancestors is None) or (n in ancestors)):
1607 1610 # Only include nodes that are both descendants and ancestors.
1608 1611 orderedout.append(n)
1609 1612 if (ancestors is not None) and (n in heads):
1610 1613 # We're trying to figure out which heads are reachable
1611 1614 # from roots.
1612 1615 # Mark this head as having been reached
1613 1616 heads[n] = True
1614 1617 elif ancestors is None:
1615 1618 # Otherwise, we're trying to discover the heads.
1616 1619 # Assume this is a head because if it isn't, the next step
1617 1620 # will eventually remove it.
1618 1621 heads[n] = True
1619 1622 # But, obviously its parents aren't.
1620 1623 for p in self.parents(n):
1621 1624 heads.pop(p, None)
1622 1625 heads = [head for head, flag in heads.iteritems() if flag]
1623 1626 roots = list(roots)
1624 1627 assert orderedout
1625 1628 assert roots
1626 1629 assert heads
1627 1630 return (orderedout, roots, heads)
1628 1631
1629 1632 def headrevs(self):
1630 1633 try:
1631 1634 return self.index.headrevs()
1632 1635 except AttributeError:
1633 1636 return self._headrevs()
1634 1637
1635 1638 def computephases(self, roots):
1636 1639 return self.index.computephasesmapsets(roots)
1637 1640
1638 1641 def _headrevs(self):
1639 1642 count = len(self)
1640 1643 if not count:
1641 1644 return [nullrev]
1642 1645 # we won't iter over filtered rev so nobody is a head at start
1643 1646 ishead = [0] * (count + 1)
1644 1647 index = self.index
1645 1648 for r in self:
1646 1649 ishead[r] = 1 # I may be an head
1647 1650 e = index[r]
1648 1651 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1649 1652 return [r for r, val in enumerate(ishead) if val]
1650 1653
1651 1654 def heads(self, start=None, stop=None):
1652 1655 """return the list of all nodes that have no children
1653 1656
1654 1657 if start is specified, only heads that are descendants of
1655 1658 start will be returned
1656 1659 if stop is specified, it will consider all the revs from stop
1657 1660 as if they had no children
1658 1661 """
1659 1662 if start is None and stop is None:
1660 1663 if not len(self):
1661 1664 return [nullid]
1662 1665 return [self.node(r) for r in self.headrevs()]
1663 1666
1664 1667 if start is None:
1665 1668 start = nullid
1666 1669 if stop is None:
1667 1670 stop = []
1668 1671 stoprevs = set([self.rev(n) for n in stop])
1669 1672 startrev = self.rev(start)
1670 1673 reachable = {startrev}
1671 1674 heads = {startrev}
1672 1675
1673 1676 parentrevs = self.parentrevs
1674 1677 for r in self.revs(start=startrev + 1):
1675 1678 for p in parentrevs(r):
1676 1679 if p in reachable:
1677 1680 if r not in stoprevs:
1678 1681 reachable.add(r)
1679 1682 heads.add(r)
1680 1683 if p in heads and p not in stoprevs:
1681 1684 heads.remove(p)
1682 1685
1683 1686 return [self.node(r) for r in heads]
1684 1687
1685 1688 def children(self, node):
1686 1689 """find the children of a given node"""
1687 1690 c = []
1688 1691 p = self.rev(node)
1689 1692 for r in self.revs(start=p + 1):
1690 1693 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1691 1694 if prevs:
1692 1695 for pr in prevs:
1693 1696 if pr == p:
1694 1697 c.append(self.node(r))
1695 1698 elif p == nullrev:
1696 1699 c.append(self.node(r))
1697 1700 return c
1698 1701
1699 1702 def commonancestorsheads(self, a, b):
1700 1703 """calculate all the heads of the common ancestors of nodes a and b"""
1701 1704 a, b = self.rev(a), self.rev(b)
1702 1705 ancs = self._commonancestorsheads(a, b)
1703 1706 return pycompat.maplist(self.node, ancs)
1704 1707
1705 1708 def _commonancestorsheads(self, *revs):
1706 1709 """calculate all the heads of the common ancestors of revs"""
1707 1710 try:
1708 1711 ancs = self.index.commonancestorsheads(*revs)
1709 1712 except (AttributeError, OverflowError): # C implementation failed
1710 1713 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1711 1714 return ancs
1712 1715
1713 1716 def isancestor(self, a, b):
1714 1717 """return True if node a is an ancestor of node b
1715 1718
1716 1719 A revision is considered an ancestor of itself."""
1717 1720 a, b = self.rev(a), self.rev(b)
1718 1721 return self.isancestorrev(a, b)
1719 1722
1720 1723 def isancestorrev(self, a, b):
1721 1724 """return True if revision a is an ancestor of revision b
1722 1725
1723 1726 A revision is considered an ancestor of itself.
1724 1727
1725 1728 The implementation of this is trivial but the use of
1726 1729 commonancestorsheads is not."""
1727 1730 if a == nullrev:
1728 1731 return True
1729 1732 elif a == b:
1730 1733 return True
1731 1734 elif a > b:
1732 1735 return False
1733 1736 return a in self._commonancestorsheads(a, b)
1734 1737
1735 1738 def ancestor(self, a, b):
1736 1739 """calculate the "best" common ancestor of nodes a and b"""
1737 1740
1738 1741 a, b = self.rev(a), self.rev(b)
1739 1742 try:
1740 1743 ancs = self.index.ancestors(a, b)
1741 1744 except (AttributeError, OverflowError):
1742 1745 ancs = ancestor.ancestors(self.parentrevs, a, b)
1743 1746 if ancs:
1744 1747 # choose a consistent winner when there's a tie
1745 1748 return min(map(self.node, ancs))
1746 1749 return nullid
1747 1750
1748 1751 def _match(self, id):
1749 1752 if isinstance(id, int):
1750 1753 # rev
1751 1754 return self.node(id)
1752 1755 if len(id) == 20:
1753 1756 # possibly a binary node
1754 1757 # odds of a binary node being all hex in ASCII are 1 in 10**25
1755 1758 try:
1756 1759 node = id
1757 1760 self.rev(node) # quick search the index
1758 1761 return node
1759 1762 except LookupError:
1760 1763 pass # may be partial hex id
1761 1764 try:
1762 1765 # str(rev)
1763 1766 rev = int(id)
1764 1767 if "%d" % rev != id:
1765 1768 raise ValueError
1766 1769 if rev < 0:
1767 1770 rev = len(self) + rev
1768 1771 if rev < 0 or rev >= len(self):
1769 1772 raise ValueError
1770 1773 return self.node(rev)
1771 1774 except (ValueError, OverflowError):
1772 1775 pass
1773 1776 if len(id) == 40:
1774 1777 try:
1775 1778 # a full hex nodeid?
1776 1779 node = bin(id)
1777 1780 self.rev(node)
1778 1781 return node
1779 1782 except (TypeError, LookupError):
1780 1783 pass
1781 1784
1782 1785 def _partialmatch(self, id):
1783 1786 # we don't care wdirfilenodeids as they should be always full hash
1784 1787 maybewdir = wdirhex.startswith(id)
1785 1788 try:
1786 1789 partial = self.index.partialmatch(id)
1787 1790 if partial and self.hasnode(partial):
1788 1791 if maybewdir:
1789 1792 # single 'ff...' match in radix tree, ambiguous with wdir
1790 1793 raise RevlogError
1791 1794 return partial
1792 1795 if maybewdir:
1793 1796 # no 'ff...' match in radix tree, wdir identified
1794 1797 raise error.WdirUnsupported
1795 1798 return None
1796 1799 except RevlogError:
1797 1800 # parsers.c radix tree lookup gave multiple matches
1798 1801 # fast path: for unfiltered changelog, radix tree is accurate
1799 1802 if not getattr(self, 'filteredrevs', None):
1800 1803 raise AmbiguousPrefixLookupError(id, self.indexfile,
1801 1804 _('ambiguous identifier'))
1802 1805 # fall through to slow path that filters hidden revisions
1803 1806 except (AttributeError, ValueError):
1804 1807 # we are pure python, or key was too short to search radix tree
1805 1808 pass
1806 1809
1807 1810 if id in self._pcache:
1808 1811 return self._pcache[id]
1809 1812
1810 1813 if len(id) <= 40:
1811 1814 try:
1812 1815 # hex(node)[:...]
1813 1816 l = len(id) // 2 # grab an even number of digits
1814 1817 prefix = bin(id[:l * 2])
1815 1818 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1816 1819 nl = [n for n in nl if hex(n).startswith(id) and
1817 1820 self.hasnode(n)]
1818 1821 if len(nl) > 0:
1819 1822 if len(nl) == 1 and not maybewdir:
1820 1823 self._pcache[id] = nl[0]
1821 1824 return nl[0]
1822 1825 raise AmbiguousPrefixLookupError(id, self.indexfile,
1823 1826 _('ambiguous identifier'))
1824 1827 if maybewdir:
1825 1828 raise error.WdirUnsupported
1826 1829 return None
1827 1830 except TypeError:
1828 1831 pass
1829 1832
1830 1833 def lookup(self, id):
1831 1834 """locate a node based on:
1832 1835 - revision number or str(revision number)
1833 1836 - nodeid or subset of hex nodeid
1834 1837 """
1835 1838 n = self._match(id)
1836 1839 if n is not None:
1837 1840 return n
1838 1841 n = self._partialmatch(id)
1839 1842 if n:
1840 1843 return n
1841 1844
1842 1845 raise LookupError(id, self.indexfile, _('no match found'))
1843 1846
1844 1847 def shortest(self, node, minlength=1):
1845 1848 """Find the shortest unambiguous prefix that matches node."""
1846 1849 def isvalid(prefix):
1847 1850 try:
1848 1851 node = self._partialmatch(prefix)
1849 1852 except error.RevlogError:
1850 1853 return False
1851 1854 except error.WdirUnsupported:
1852 1855 # single 'ff...' match
1853 1856 return True
1854 1857 if node is None:
1855 1858 raise LookupError(node, self.indexfile, _('no node'))
1856 1859 return True
1857 1860
1858 1861 def maybewdir(prefix):
1859 1862 return all(c == 'f' for c in prefix)
1860 1863
1861 1864 hexnode = hex(node)
1862 1865
1863 1866 def disambiguate(hexnode, minlength):
1864 1867 """Disambiguate against wdirid."""
1865 1868 for length in range(minlength, 41):
1866 1869 prefix = hexnode[:length]
1867 1870 if not maybewdir(prefix):
1868 1871 return prefix
1869 1872
1870 1873 if not getattr(self, 'filteredrevs', None):
1871 1874 try:
1872 1875 length = max(self.index.shortest(node), minlength)
1873 1876 return disambiguate(hexnode, length)
1874 1877 except RevlogError:
1875 1878 if node != wdirid:
1876 1879 raise LookupError(node, self.indexfile, _('no node'))
1877 1880 except AttributeError:
1878 1881 # Fall through to pure code
1879 1882 pass
1880 1883
1881 1884 if node == wdirid:
1882 1885 for length in range(minlength, 41):
1883 1886 prefix = hexnode[:length]
1884 1887 if isvalid(prefix):
1885 1888 return prefix
1886 1889
1887 1890 for length in range(minlength, 41):
1888 1891 prefix = hexnode[:length]
1889 1892 if isvalid(prefix):
1890 1893 return disambiguate(hexnode, length)
1891 1894
1892 1895 def cmp(self, node, text):
1893 1896 """compare text with a given file revision
1894 1897
1895 1898 returns True if text is different than what is stored.
1896 1899 """
1897 1900 p1, p2 = self.parents(node)
1898 1901 return hash(text, p1, p2) != node
1899 1902
1900 1903 def _cachesegment(self, offset, data):
1901 1904 """Add a segment to the revlog cache.
1902 1905
1903 1906 Accepts an absolute offset and the data that is at that location.
1904 1907 """
1905 1908 o, d = self._chunkcache
1906 1909 # try to add to existing cache
1907 1910 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1908 1911 self._chunkcache = o, d + data
1909 1912 else:
1910 1913 self._chunkcache = offset, data
1911 1914
1912 1915 def _readsegment(self, offset, length, df=None):
1913 1916 """Load a segment of raw data from the revlog.
1914 1917
1915 1918 Accepts an absolute offset, length to read, and an optional existing
1916 1919 file handle to read from.
1917 1920
1918 1921 If an existing file handle is passed, it will be seeked and the
1919 1922 original seek position will NOT be restored.
1920 1923
1921 1924 Returns a str or buffer of raw byte data.
1922 1925 """
1923 1926 # Cache data both forward and backward around the requested
1924 1927 # data, in a fixed size window. This helps speed up operations
1925 1928 # involving reading the revlog backwards.
1926 1929 cachesize = self._chunkcachesize
1927 1930 realoffset = offset & ~(cachesize - 1)
1928 1931 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1929 1932 - realoffset)
1930 1933 with self._datareadfp(df) as df:
1931 1934 df.seek(realoffset)
1932 1935 d = df.read(reallength)
1933 1936 self._cachesegment(realoffset, d)
1934 1937 if offset != realoffset or reallength != length:
1935 1938 return util.buffer(d, offset - realoffset, length)
1936 1939 return d
1937 1940
1938 1941 def _getsegment(self, offset, length, df=None):
1939 1942 """Obtain a segment of raw data from the revlog.
1940 1943
1941 1944 Accepts an absolute offset, length of bytes to obtain, and an
1942 1945 optional file handle to the already-opened revlog. If the file
1943 1946 handle is used, it's original seek position will not be preserved.
1944 1947
1945 1948 Requests for data may be returned from a cache.
1946 1949
1947 1950 Returns a str or a buffer instance of raw byte data.
1948 1951 """
1949 1952 o, d = self._chunkcache
1950 1953 l = len(d)
1951 1954
1952 1955 # is it in the cache?
1953 1956 cachestart = offset - o
1954 1957 cacheend = cachestart + length
1955 1958 if cachestart >= 0 and cacheend <= l:
1956 1959 if cachestart == 0 and cacheend == l:
1957 1960 return d # avoid a copy
1958 1961 return util.buffer(d, cachestart, cacheend - cachestart)
1959 1962
1960 1963 return self._readsegment(offset, length, df=df)
1961 1964
1962 1965 def _getsegmentforrevs(self, startrev, endrev, df=None):
1963 1966 """Obtain a segment of raw data corresponding to a range of revisions.
1964 1967
1965 1968 Accepts the start and end revisions and an optional already-open
1966 1969 file handle to be used for reading. If the file handle is read, its
1967 1970 seek position will not be preserved.
1968 1971
1969 1972 Requests for data may be satisfied by a cache.
1970 1973
1971 1974 Returns a 2-tuple of (offset, data) for the requested range of
1972 1975 revisions. Offset is the integer offset from the beginning of the
1973 1976 revlog and data is a str or buffer of the raw byte data.
1974 1977
1975 1978 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1976 1979 to determine where each revision's data begins and ends.
1977 1980 """
1978 1981 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1979 1982 # (functions are expensive).
1980 1983 index = self.index
1981 1984 istart = index[startrev]
1982 1985 start = int(istart[0] >> 16)
1983 1986 if startrev == endrev:
1984 1987 end = start + istart[1]
1985 1988 else:
1986 1989 iend = index[endrev]
1987 1990 end = int(iend[0] >> 16) + iend[1]
1988 1991
1989 1992 if self._inline:
1990 1993 start += (startrev + 1) * self._io.size
1991 1994 end += (endrev + 1) * self._io.size
1992 1995 length = end - start
1993 1996
1994 1997 return start, self._getsegment(start, length, df=df)
1995 1998
1996 1999 def _chunk(self, rev, df=None):
1997 2000 """Obtain a single decompressed chunk for a revision.
1998 2001
1999 2002 Accepts an integer revision and an optional already-open file handle
2000 2003 to be used for reading. If used, the seek position of the file will not
2001 2004 be preserved.
2002 2005
2003 2006 Returns a str holding uncompressed data for the requested revision.
2004 2007 """
2005 2008 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2006 2009
2007 2010 def _chunks(self, revs, df=None, targetsize=None):
2008 2011 """Obtain decompressed chunks for the specified revisions.
2009 2012
2010 2013 Accepts an iterable of numeric revisions that are assumed to be in
2011 2014 ascending order. Also accepts an optional already-open file handle
2012 2015 to be used for reading. If used, the seek position of the file will
2013 2016 not be preserved.
2014 2017
2015 2018 This function is similar to calling ``self._chunk()`` multiple times,
2016 2019 but is faster.
2017 2020
2018 2021 Returns a list with decompressed data for each requested revision.
2019 2022 """
2020 2023 if not revs:
2021 2024 return []
2022 2025 start = self.start
2023 2026 length = self.length
2024 2027 inline = self._inline
2025 2028 iosize = self._io.size
2026 2029 buffer = util.buffer
2027 2030
2028 2031 l = []
2029 2032 ladd = l.append
2030 2033
2031 2034 if not self._withsparseread:
2032 2035 slicedchunks = (revs,)
2033 2036 else:
2034 2037 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2035 2038
2036 2039 for revschunk in slicedchunks:
2037 2040 firstrev = revschunk[0]
2038 2041 # Skip trailing revisions with empty diff
2039 2042 for lastrev in revschunk[::-1]:
2040 2043 if length(lastrev) != 0:
2041 2044 break
2042 2045
2043 2046 try:
2044 2047 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2045 2048 except OverflowError:
2046 2049 # issue4215 - we can't cache a run of chunks greater than
2047 2050 # 2G on Windows
2048 2051 return [self._chunk(rev, df=df) for rev in revschunk]
2049 2052
2050 2053 decomp = self.decompress
2051 2054 for rev in revschunk:
2052 2055 chunkstart = start(rev)
2053 2056 if inline:
2054 2057 chunkstart += (rev + 1) * iosize
2055 2058 chunklength = length(rev)
2056 2059 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2057 2060
2058 2061 return l
2059 2062
2060 2063 def _chunkclear(self):
2061 2064 """Clear the raw chunk cache."""
2062 2065 self._chunkcache = (0, '')
2063 2066
2064 2067 def deltaparent(self, rev):
2065 2068 """return deltaparent of the given revision"""
2066 2069 base = self.index[rev][3]
2067 2070 if base == rev:
2068 2071 return nullrev
2069 2072 elif self._generaldelta:
2070 2073 return base
2071 2074 else:
2072 2075 return rev - 1
2073 2076
2074 2077 def revdiff(self, rev1, rev2):
2075 2078 """return or calculate a delta between two revisions
2076 2079
2077 2080 The delta calculated is in binary form and is intended to be written to
2078 2081 revlog data directly. So this function needs raw revision data.
2079 2082 """
2080 2083 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2081 2084 return bytes(self._chunk(rev2))
2082 2085
2083 2086 return mdiff.textdiff(self.revision(rev1, raw=True),
2084 2087 self.revision(rev2, raw=True))
2085 2088
2086 2089 def revision(self, nodeorrev, _df=None, raw=False):
2087 2090 """return an uncompressed revision of a given node or revision
2088 2091 number.
2089 2092
2090 2093 _df - an existing file handle to read from. (internal-only)
2091 2094 raw - an optional argument specifying if the revision data is to be
2092 2095 treated as raw data when applying flag transforms. 'raw' should be set
2093 2096 to True when generating changegroups or in debug commands.
2094 2097 """
2095 2098 if isinstance(nodeorrev, int):
2096 2099 rev = nodeorrev
2097 2100 node = self.node(rev)
2098 2101 else:
2099 2102 node = nodeorrev
2100 2103 rev = None
2101 2104
2102 2105 cachedrev = None
2103 2106 flags = None
2104 2107 rawtext = None
2105 2108 if node == nullid:
2106 2109 return ""
2107 2110 if self._cache:
2108 2111 if self._cache[0] == node:
2109 2112 # _cache only stores rawtext
2110 2113 if raw:
2111 2114 return self._cache[2]
2112 2115 # duplicated, but good for perf
2113 2116 if rev is None:
2114 2117 rev = self.rev(node)
2115 2118 if flags is None:
2116 2119 flags = self.flags(rev)
2117 2120 # no extra flags set, no flag processor runs, text = rawtext
2118 2121 if flags == REVIDX_DEFAULT_FLAGS:
2119 2122 return self._cache[2]
2120 2123 # rawtext is reusable. need to run flag processor
2121 2124 rawtext = self._cache[2]
2122 2125
2123 2126 cachedrev = self._cache[1]
2124 2127
2125 2128 # look up what we need to read
2126 2129 if rawtext is None:
2127 2130 if rev is None:
2128 2131 rev = self.rev(node)
2129 2132
2130 2133 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2131 2134 if stopped:
2132 2135 rawtext = self._cache[2]
2133 2136
2134 2137 # drop cache to save memory
2135 2138 self._cache = None
2136 2139
2137 2140 targetsize = None
2138 2141 rawsize = self.index[rev][2]
2139 2142 if 0 <= rawsize:
2140 2143 targetsize = 4 * rawsize
2141 2144
2142 2145 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2143 2146 if rawtext is None:
2144 2147 rawtext = bytes(bins[0])
2145 2148 bins = bins[1:]
2146 2149
2147 2150 rawtext = mdiff.patches(rawtext, bins)
2148 2151 self._cache = (node, rev, rawtext)
2149 2152
2150 2153 if flags is None:
2151 2154 if rev is None:
2152 2155 rev = self.rev(node)
2153 2156 flags = self.flags(rev)
2154 2157
2155 2158 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2156 2159 if validatehash:
2157 2160 self.checkhash(text, node, rev=rev)
2158 2161
2159 2162 return text
2160 2163
2161 2164 def hash(self, text, p1, p2):
2162 2165 """Compute a node hash.
2163 2166
2164 2167 Available as a function so that subclasses can replace the hash
2165 2168 as needed.
2166 2169 """
2167 2170 return hash(text, p1, p2)
2168 2171
2169 2172 def _processflags(self, text, flags, operation, raw=False):
2170 2173 """Inspect revision data flags and applies transforms defined by
2171 2174 registered flag processors.
2172 2175
2173 2176 ``text`` - the revision data to process
2174 2177 ``flags`` - the revision flags
2175 2178 ``operation`` - the operation being performed (read or write)
2176 2179 ``raw`` - an optional argument describing if the raw transform should be
2177 2180 applied.
2178 2181
2179 2182 This method processes the flags in the order (or reverse order if
2180 2183 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2181 2184 flag processors registered for present flags. The order of flags defined
2182 2185 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2183 2186
2184 2187 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2185 2188 processed text and ``validatehash`` is a bool indicating whether the
2186 2189 returned text should be checked for hash integrity.
2187 2190
2188 2191 Note: If the ``raw`` argument is set, it has precedence over the
2189 2192 operation and will only update the value of ``validatehash``.
2190 2193 """
2191 2194 # fast path: no flag processors will run
2192 2195 if flags == 0:
2193 2196 return text, True
2194 2197 if not operation in ('read', 'write'):
2195 2198 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2196 2199 # Check all flags are known.
2197 2200 if flags & ~REVIDX_KNOWN_FLAGS:
2198 2201 raise RevlogError(_("incompatible revision flag '%#x'") %
2199 2202 (flags & ~REVIDX_KNOWN_FLAGS))
2200 2203 validatehash = True
2201 2204 # Depending on the operation (read or write), the order might be
2202 2205 # reversed due to non-commutative transforms.
2203 2206 orderedflags = REVIDX_FLAGS_ORDER
2204 2207 if operation == 'write':
2205 2208 orderedflags = reversed(orderedflags)
2206 2209
2207 2210 for flag in orderedflags:
2208 2211 # If a flagprocessor has been registered for a known flag, apply the
2209 2212 # related operation transform and update result tuple.
2210 2213 if flag & flags:
2211 2214 vhash = True
2212 2215
2213 2216 if flag not in _flagprocessors:
2214 2217 message = _("missing processor for flag '%#x'") % (flag)
2215 2218 raise RevlogError(message)
2216 2219
2217 2220 processor = _flagprocessors[flag]
2218 2221 if processor is not None:
2219 2222 readtransform, writetransform, rawtransform = processor
2220 2223
2221 2224 if raw:
2222 2225 vhash = rawtransform(self, text)
2223 2226 elif operation == 'read':
2224 2227 text, vhash = readtransform(self, text)
2225 2228 else: # write operation
2226 2229 text, vhash = writetransform(self, text)
2227 2230 validatehash = validatehash and vhash
2228 2231
2229 2232 return text, validatehash
2230 2233
2231 2234 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2232 2235 """Check node hash integrity.
2233 2236
2234 2237 Available as a function so that subclasses can extend hash mismatch
2235 2238 behaviors as needed.
2236 2239 """
2237 2240 try:
2238 2241 if p1 is None and p2 is None:
2239 2242 p1, p2 = self.parents(node)
2240 2243 if node != self.hash(text, p1, p2):
2241 2244 revornode = rev
2242 2245 if revornode is None:
2243 2246 revornode = templatefilters.short(hex(node))
2244 2247 raise RevlogError(_("integrity check failed on %s:%s")
2245 2248 % (self.indexfile, pycompat.bytestr(revornode)))
2246 2249 except RevlogError:
2247 2250 if self._censorable and _censoredtext(text):
2248 2251 raise error.CensoredNodeError(self.indexfile, node, text)
2249 2252 raise
2250 2253
2251 2254 def _enforceinlinesize(self, tr, fp=None):
2252 2255 """Check if the revlog is too big for inline and convert if so.
2253 2256
2254 2257 This should be called after revisions are added to the revlog. If the
2255 2258 revlog has grown too large to be an inline revlog, it will convert it
2256 2259 to use multiple index and data files.
2257 2260 """
2258 2261 tiprev = len(self) - 1
2259 2262 if (not self._inline or
2260 2263 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2261 2264 return
2262 2265
2263 2266 trinfo = tr.find(self.indexfile)
2264 2267 if trinfo is None:
2265 2268 raise RevlogError(_("%s not found in the transaction")
2266 2269 % self.indexfile)
2267 2270
2268 2271 trindex = trinfo[2]
2269 2272 if trindex is not None:
2270 2273 dataoff = self.start(trindex)
2271 2274 else:
2272 2275 # revlog was stripped at start of transaction, use all leftover data
2273 2276 trindex = len(self) - 1
2274 2277 dataoff = self.end(tiprev)
2275 2278
2276 2279 tr.add(self.datafile, dataoff)
2277 2280
2278 2281 if fp:
2279 2282 fp.flush()
2280 2283 fp.close()
2281 2284
2282 2285 with self._datafp('w') as df:
2283 2286 for r in self:
2284 2287 df.write(self._getsegmentforrevs(r, r)[1])
2285 2288
2286 2289 with self._indexfp('w') as fp:
2287 2290 self.version &= ~FLAG_INLINE_DATA
2288 2291 self._inline = False
2289 2292 io = self._io
2290 2293 for i in self:
2291 2294 e = io.packentry(self.index[i], self.node, self.version, i)
2292 2295 fp.write(e)
2293 2296
2294 2297 # the temp file replace the real index when we exit the context
2295 2298 # manager
2296 2299
2297 2300 tr.replace(self.indexfile, trindex * self._io.size)
2298 2301 self._chunkclear()
2299 2302
2300 2303 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2301 2304 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2302 2305 """add a revision to the log
2303 2306
2304 2307 text - the revision data to add
2305 2308 transaction - the transaction object used for rollback
2306 2309 link - the linkrev data to add
2307 2310 p1, p2 - the parent nodeids of the revision
2308 2311 cachedelta - an optional precomputed delta
2309 2312 node - nodeid of revision; typically node is not specified, and it is
2310 2313 computed by default as hash(text, p1, p2), however subclasses might
2311 2314 use different hashing method (and override checkhash() in such case)
2312 2315 flags - the known flags to set on the revision
2313 2316 deltacomputer - an optional _deltacomputer instance shared between
2314 2317 multiple calls
2315 2318 """
2316 2319 if link == nullrev:
2317 2320 raise RevlogError(_("attempted to add linkrev -1 to %s")
2318 2321 % self.indexfile)
2319 2322
2320 2323 if flags:
2321 2324 node = node or self.hash(text, p1, p2)
2322 2325
2323 2326 rawtext, validatehash = self._processflags(text, flags, 'write')
2324 2327
2325 2328 # If the flag processor modifies the revision data, ignore any provided
2326 2329 # cachedelta.
2327 2330 if rawtext != text:
2328 2331 cachedelta = None
2329 2332
2330 2333 if len(rawtext) > _maxentrysize:
2331 2334 raise RevlogError(
2332 2335 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2333 2336 % (self.indexfile, len(rawtext)))
2334 2337
2335 2338 node = node or self.hash(rawtext, p1, p2)
2336 2339 if node in self.nodemap:
2337 2340 return node
2338 2341
2339 2342 if validatehash:
2340 2343 self.checkhash(rawtext, node, p1=p1, p2=p2)
2341 2344
2342 2345 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2343 2346 flags, cachedelta=cachedelta,
2344 2347 deltacomputer=deltacomputer)
2345 2348
2346 2349 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2347 2350 cachedelta=None, deltacomputer=None):
2348 2351 """add a raw revision with known flags, node and parents
2349 2352 useful when reusing a revision not stored in this revlog (ex: received
2350 2353 over wire, or read from an external bundle).
2351 2354 """
2352 2355 dfh = None
2353 2356 if not self._inline:
2354 2357 dfh = self._datafp("a+")
2355 2358 ifh = self._indexfp("a+")
2356 2359 try:
2357 2360 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2358 2361 flags, cachedelta, ifh, dfh,
2359 2362 deltacomputer=deltacomputer)
2360 2363 finally:
2361 2364 if dfh:
2362 2365 dfh.close()
2363 2366 ifh.close()
2364 2367
2365 2368 def compress(self, data):
2366 2369 """Generate a possibly-compressed representation of data."""
2367 2370 if not data:
2368 2371 return '', data
2369 2372
2370 2373 compressed = self._compressor.compress(data)
2371 2374
2372 2375 if compressed:
2373 2376 # The revlog compressor added the header in the returned data.
2374 2377 return '', compressed
2375 2378
2376 2379 if data[0:1] == '\0':
2377 2380 return '', data
2378 2381 return 'u', data
2379 2382
2380 2383 def decompress(self, data):
2381 2384 """Decompress a revlog chunk.
2382 2385
2383 2386 The chunk is expected to begin with a header identifying the
2384 2387 format type so it can be routed to an appropriate decompressor.
2385 2388 """
2386 2389 if not data:
2387 2390 return data
2388 2391
2389 2392 # Revlogs are read much more frequently than they are written and many
2390 2393 # chunks only take microseconds to decompress, so performance is
2391 2394 # important here.
2392 2395 #
2393 2396 # We can make a few assumptions about revlogs:
2394 2397 #
2395 2398 # 1) the majority of chunks will be compressed (as opposed to inline
2396 2399 # raw data).
2397 2400 # 2) decompressing *any* data will likely by at least 10x slower than
2398 2401 # returning raw inline data.
2399 2402 # 3) we want to prioritize common and officially supported compression
2400 2403 # engines
2401 2404 #
2402 2405 # It follows that we want to optimize for "decompress compressed data
2403 2406 # when encoded with common and officially supported compression engines"
2404 2407 # case over "raw data" and "data encoded by less common or non-official
2405 2408 # compression engines." That is why we have the inline lookup first
2406 2409 # followed by the compengines lookup.
2407 2410 #
2408 2411 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2409 2412 # compressed chunks. And this matters for changelog and manifest reads.
2410 2413 t = data[0:1]
2411 2414
2412 2415 if t == 'x':
2413 2416 try:
2414 2417 return _zlibdecompress(data)
2415 2418 except zlib.error as e:
2416 2419 raise RevlogError(_('revlog decompress error: %s') %
2417 2420 stringutil.forcebytestr(e))
2418 2421 # '\0' is more common than 'u' so it goes first.
2419 2422 elif t == '\0':
2420 2423 return data
2421 2424 elif t == 'u':
2422 2425 return util.buffer(data, 1)
2423 2426
2424 2427 try:
2425 2428 compressor = self._decompressors[t]
2426 2429 except KeyError:
2427 2430 try:
2428 2431 engine = util.compengines.forrevlogheader(t)
2429 2432 compressor = engine.revlogcompressor()
2430 2433 self._decompressors[t] = compressor
2431 2434 except KeyError:
2432 2435 raise RevlogError(_('unknown compression type %r') % t)
2433 2436
2434 2437 return compressor.decompress(data)
2435 2438
2436 2439 def _isgooddeltainfo(self, deltainfo, revinfo):
2437 2440 """Returns True if the given delta is good. Good means that it is within
2438 2441 the disk span, disk size, and chain length bounds that we know to be
2439 2442 performant."""
2440 2443 if deltainfo is None:
2441 2444 return False
2442 2445
2443 2446 # - 'deltainfo.distance' is the distance from the base revision --
2444 2447 # bounding it limits the amount of I/O we need to do.
2445 2448 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2446 2449 # deltas we need to apply -- bounding it limits the amount of CPU
2447 2450 # we consume.
2448 2451
2449 2452 if self._sparserevlog:
2450 2453 # As sparse-read will be used, we can consider that the distance,
2451 2454 # instead of being the span of the whole chunk,
2452 2455 # is the span of the largest read chunk
2453 2456 base = deltainfo.base
2454 2457
2455 2458 if base != nullrev:
2456 2459 deltachain = self._deltachain(base)[0]
2457 2460 else:
2458 2461 deltachain = []
2459 2462
2460 2463 chunks = _slicechunk(self, deltachain, deltainfo)
2461 2464 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2462 2465 else:
2463 2466 distance = deltainfo.distance
2464 2467
2465 2468 textlen = revinfo.textlen
2466 2469 defaultmax = textlen * 4
2467 2470 maxdist = self._maxdeltachainspan
2468 2471 if not maxdist:
2469 2472 maxdist = distance # ensure the conditional pass
2470 2473 maxdist = max(maxdist, defaultmax)
2471 2474 if self._sparserevlog and maxdist < self._srmingapsize:
2472 2475 # In multiple place, we are ignoring irrelevant data range below a
2473 2476 # certain size. Be also apply this tradeoff here and relax span
2474 2477 # constraint for small enought content.
2475 2478 maxdist = self._srmingapsize
2476 2479
2477 2480 # Bad delta from read span:
2478 2481 #
2479 2482 # If the span of data read is larger than the maximum allowed.
2480 2483 if maxdist < distance:
2481 2484 return False
2482 2485
2483 2486 # Bad delta from new delta size:
2484 2487 #
2485 2488 # If the delta size is larger than the target text, storing the
2486 2489 # delta will be inefficient.
2487 2490 if textlen < deltainfo.deltalen:
2488 2491 return False
2489 2492
2490 2493 # Bad delta from cumulated payload size:
2491 2494 #
2492 2495 # If the sum of delta get larger than K * target text length.
2493 2496 if textlen * 2 < deltainfo.compresseddeltalen:
2494 2497 return False
2495 2498
2496 2499 # Bad delta from chain length:
2497 2500 #
2498 2501 # If the number of delta in the chain gets too high.
2499 2502 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2500 2503 return False
2501 2504
2502 2505 return True
2503 2506
2504 2507 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2505 2508 cachedelta, ifh, dfh, alwayscache=False,
2506 2509 deltacomputer=None):
2507 2510 """internal function to add revisions to the log
2508 2511
2509 2512 see addrevision for argument descriptions.
2510 2513
2511 2514 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2512 2515
2513 2516 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2514 2517 be used.
2515 2518
2516 2519 invariants:
2517 2520 - rawtext is optional (can be None); if not set, cachedelta must be set.
2518 2521 if both are set, they must correspond to each other.
2519 2522 """
2520 2523 if node == nullid:
2521 2524 raise RevlogError(_("%s: attempt to add null revision") %
2522 2525 (self.indexfile))
2523 2526 if node == wdirid or node in wdirfilenodeids:
2524 2527 raise RevlogError(_("%s: attempt to add wdir revision") %
2525 2528 (self.indexfile))
2526 2529
2527 2530 if self._inline:
2528 2531 fh = ifh
2529 2532 else:
2530 2533 fh = dfh
2531 2534
2532 2535 btext = [rawtext]
2533 2536
2534 2537 curr = len(self)
2535 2538 prev = curr - 1
2536 2539 offset = self.end(prev)
2537 2540 p1r, p2r = self.rev(p1), self.rev(p2)
2538 2541
2539 2542 # full versions are inserted when the needed deltas
2540 2543 # become comparable to the uncompressed text
2541 2544 if rawtext is None:
2542 2545 # need rawtext size, before changed by flag processors, which is
2543 2546 # the non-raw size. use revlog explicitly to avoid filelog's extra
2544 2547 # logic that might remove metadata size.
2545 2548 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2546 2549 cachedelta[1])
2547 2550 else:
2548 2551 textlen = len(rawtext)
2549 2552
2550 2553 if deltacomputer is None:
2551 2554 deltacomputer = _deltacomputer(self)
2552 2555
2553 2556 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2554 2557
2555 2558 # no delta for flag processor revision (see "candelta" for why)
2556 2559 # not calling candelta since only one revision needs test, also to
2557 2560 # avoid overhead fetching flags again.
2558 2561 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2559 2562 deltainfo = None
2560 2563 else:
2561 2564 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2562 2565
2563 2566 if deltainfo is not None:
2564 2567 base = deltainfo.base
2565 2568 chainbase = deltainfo.chainbase
2566 2569 data = deltainfo.data
2567 2570 l = deltainfo.deltalen
2568 2571 else:
2569 2572 rawtext = deltacomputer.buildtext(revinfo, fh)
2570 2573 data = self.compress(rawtext)
2571 2574 l = len(data[1]) + len(data[0])
2572 2575 base = chainbase = curr
2573 2576
2574 2577 e = (offset_type(offset, flags), l, textlen,
2575 2578 base, link, p1r, p2r, node)
2576 2579 self.index.append(e)
2577 2580 self.nodemap[node] = curr
2578 2581
2579 2582 entry = self._io.packentry(e, self.node, self.version, curr)
2580 2583 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2581 2584
2582 2585 if alwayscache and rawtext is None:
2583 2586 rawtext = deltacomputer._buildtext(revinfo, fh)
2584 2587
2585 2588 if type(rawtext) == bytes: # only accept immutable objects
2586 2589 self._cache = (node, curr, rawtext)
2587 2590 self._chainbasecache[curr] = chainbase
2588 2591 return node
2589 2592
2590 2593 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2591 2594 # Files opened in a+ mode have inconsistent behavior on various
2592 2595 # platforms. Windows requires that a file positioning call be made
2593 2596 # when the file handle transitions between reads and writes. See
2594 2597 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2595 2598 # platforms, Python or the platform itself can be buggy. Some versions
2596 2599 # of Solaris have been observed to not append at the end of the file
2597 2600 # if the file was seeked to before the end. See issue4943 for more.
2598 2601 #
2599 2602 # We work around this issue by inserting a seek() before writing.
2600 2603 # Note: This is likely not necessary on Python 3.
2601 2604 ifh.seek(0, os.SEEK_END)
2602 2605 if dfh:
2603 2606 dfh.seek(0, os.SEEK_END)
2604 2607
2605 2608 curr = len(self) - 1
2606 2609 if not self._inline:
2607 2610 transaction.add(self.datafile, offset)
2608 2611 transaction.add(self.indexfile, curr * len(entry))
2609 2612 if data[0]:
2610 2613 dfh.write(data[0])
2611 2614 dfh.write(data[1])
2612 2615 ifh.write(entry)
2613 2616 else:
2614 2617 offset += curr * self._io.size
2615 2618 transaction.add(self.indexfile, offset, curr)
2616 2619 ifh.write(entry)
2617 2620 ifh.write(data[0])
2618 2621 ifh.write(data[1])
2619 2622 self._enforceinlinesize(transaction, ifh)
2620 2623
2621 2624 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2622 2625 """
2623 2626 add a delta group
2624 2627
2625 2628 given a set of deltas, add them to the revision log. the
2626 2629 first delta is against its parent, which should be in our
2627 2630 log, the rest are against the previous delta.
2628 2631
2629 2632 If ``addrevisioncb`` is defined, it will be called with arguments of
2630 2633 this revlog and the node that was added.
2631 2634 """
2632 2635
2633 2636 nodes = []
2634 2637
2635 2638 r = len(self)
2636 2639 end = 0
2637 2640 if r:
2638 2641 end = self.end(r - 1)
2639 2642 ifh = self._indexfp("a+")
2640 2643 isize = r * self._io.size
2641 2644 if self._inline:
2642 2645 transaction.add(self.indexfile, end + isize, r)
2643 2646 dfh = None
2644 2647 else:
2645 2648 transaction.add(self.indexfile, isize, r)
2646 2649 transaction.add(self.datafile, end)
2647 2650 dfh = self._datafp("a+")
2648 2651 def flush():
2649 2652 if dfh:
2650 2653 dfh.flush()
2651 2654 ifh.flush()
2652 2655 try:
2653 2656 deltacomputer = _deltacomputer(self)
2654 2657 # loop through our set of deltas
2655 2658 for data in deltas:
2656 2659 node, p1, p2, linknode, deltabase, delta, flags = data
2657 2660 link = linkmapper(linknode)
2658 2661 flags = flags or REVIDX_DEFAULT_FLAGS
2659 2662
2660 2663 nodes.append(node)
2661 2664
2662 2665 if node in self.nodemap:
2663 2666 # this can happen if two branches make the same change
2664 2667 continue
2665 2668
2666 2669 for p in (p1, p2):
2667 2670 if p not in self.nodemap:
2668 2671 raise LookupError(p, self.indexfile,
2669 2672 _('unknown parent'))
2670 2673
2671 2674 if deltabase not in self.nodemap:
2672 2675 raise LookupError(deltabase, self.indexfile,
2673 2676 _('unknown delta base'))
2674 2677
2675 2678 baserev = self.rev(deltabase)
2676 2679
2677 2680 if baserev != nullrev and self.iscensored(baserev):
2678 2681 # if base is censored, delta must be full replacement in a
2679 2682 # single patch operation
2680 2683 hlen = struct.calcsize(">lll")
2681 2684 oldlen = self.rawsize(baserev)
2682 2685 newlen = len(delta) - hlen
2683 2686 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2684 2687 raise error.CensoredBaseError(self.indexfile,
2685 2688 self.node(baserev))
2686 2689
2687 2690 if not flags and self._peek_iscensored(baserev, delta, flush):
2688 2691 flags |= REVIDX_ISCENSORED
2689 2692
2690 2693 # We assume consumers of addrevisioncb will want to retrieve
2691 2694 # the added revision, which will require a call to
2692 2695 # revision(). revision() will fast path if there is a cache
2693 2696 # hit. So, we tell _addrevision() to always cache in this case.
2694 2697 # We're only using addgroup() in the context of changegroup
2695 2698 # generation so the revision data can always be handled as raw
2696 2699 # by the flagprocessor.
2697 2700 self._addrevision(node, None, transaction, link,
2698 2701 p1, p2, flags, (baserev, delta),
2699 2702 ifh, dfh,
2700 2703 alwayscache=bool(addrevisioncb),
2701 2704 deltacomputer=deltacomputer)
2702 2705
2703 2706 if addrevisioncb:
2704 2707 addrevisioncb(self, node)
2705 2708
2706 2709 if not dfh and not self._inline:
2707 2710 # addrevision switched from inline to conventional
2708 2711 # reopen the index
2709 2712 ifh.close()
2710 2713 dfh = self._datafp("a+")
2711 2714 ifh = self._indexfp("a+")
2712 2715 finally:
2713 2716 if dfh:
2714 2717 dfh.close()
2715 2718 ifh.close()
2716 2719
2717 2720 return nodes
2718 2721
2719 2722 def iscensored(self, rev):
2720 2723 """Check if a file revision is censored."""
2721 2724 if not self._censorable:
2722 2725 return False
2723 2726
2724 2727 return self.flags(rev) & REVIDX_ISCENSORED
2725 2728
2726 2729 def _peek_iscensored(self, baserev, delta, flush):
2727 2730 """Quickly check if a delta produces a censored revision."""
2728 2731 if not self._censorable:
2729 2732 return False
2730 2733
2731 2734 # Fragile heuristic: unless new file meta keys are added alphabetically
2732 2735 # preceding "censored", all censored revisions are prefixed by
2733 2736 # "\1\ncensored:". A delta producing such a censored revision must be a
2734 2737 # full-replacement delta, so we inspect the first and only patch in the
2735 2738 # delta for this prefix.
2736 2739 hlen = struct.calcsize(">lll")
2737 2740 if len(delta) <= hlen:
2738 2741 return False
2739 2742
2740 2743 oldlen = self.rawsize(baserev)
2741 2744 newlen = len(delta) - hlen
2742 2745 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2743 2746 return False
2744 2747
2745 2748 add = "\1\ncensored:"
2746 2749 addlen = len(add)
2747 2750 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2748 2751
2749 2752 def getstrippoint(self, minlink):
2750 2753 """find the minimum rev that must be stripped to strip the linkrev
2751 2754
2752 2755 Returns a tuple containing the minimum rev and a set of all revs that
2753 2756 have linkrevs that will be broken by this strip.
2754 2757 """
2755 2758 brokenrevs = set()
2756 2759 strippoint = len(self)
2757 2760
2758 2761 heads = {}
2759 2762 futurelargelinkrevs = set()
2760 2763 for head in self.headrevs():
2761 2764 headlinkrev = self.linkrev(head)
2762 2765 heads[head] = headlinkrev
2763 2766 if headlinkrev >= minlink:
2764 2767 futurelargelinkrevs.add(headlinkrev)
2765 2768
2766 2769 # This algorithm involves walking down the rev graph, starting at the
2767 2770 # heads. Since the revs are topologically sorted according to linkrev,
2768 2771 # once all head linkrevs are below the minlink, we know there are
2769 2772 # no more revs that could have a linkrev greater than minlink.
2770 2773 # So we can stop walking.
2771 2774 while futurelargelinkrevs:
2772 2775 strippoint -= 1
2773 2776 linkrev = heads.pop(strippoint)
2774 2777
2775 2778 if linkrev < minlink:
2776 2779 brokenrevs.add(strippoint)
2777 2780 else:
2778 2781 futurelargelinkrevs.remove(linkrev)
2779 2782
2780 2783 for p in self.parentrevs(strippoint):
2781 2784 if p != nullrev:
2782 2785 plinkrev = self.linkrev(p)
2783 2786 heads[p] = plinkrev
2784 2787 if plinkrev >= minlink:
2785 2788 futurelargelinkrevs.add(plinkrev)
2786 2789
2787 2790 return strippoint, brokenrevs
2788 2791
2789 2792 def strip(self, minlink, transaction):
2790 2793 """truncate the revlog on the first revision with a linkrev >= minlink
2791 2794
2792 2795 This function is called when we're stripping revision minlink and
2793 2796 its descendants from the repository.
2794 2797
2795 2798 We have to remove all revisions with linkrev >= minlink, because
2796 2799 the equivalent changelog revisions will be renumbered after the
2797 2800 strip.
2798 2801
2799 2802 So we truncate the revlog on the first of these revisions, and
2800 2803 trust that the caller has saved the revisions that shouldn't be
2801 2804 removed and that it'll re-add them after this truncation.
2802 2805 """
2803 2806 if len(self) == 0:
2804 2807 return
2805 2808
2806 2809 rev, _ = self.getstrippoint(minlink)
2807 2810 if rev == len(self):
2808 2811 return
2809 2812
2810 2813 # first truncate the files on disk
2811 2814 end = self.start(rev)
2812 2815 if not self._inline:
2813 2816 transaction.add(self.datafile, end)
2814 2817 end = rev * self._io.size
2815 2818 else:
2816 2819 end += rev * self._io.size
2817 2820
2818 2821 transaction.add(self.indexfile, end)
2819 2822
2820 2823 # then reset internal state in memory to forget those revisions
2821 2824 self._cache = None
2822 2825 self._chaininfocache = {}
2823 2826 self._chunkclear()
2824 2827 for x in pycompat.xrange(rev, len(self)):
2825 2828 del self.nodemap[self.node(x)]
2826 2829
2827 2830 del self.index[rev:-1]
2828 2831 self._nodepos = None
2829 2832
2830 2833 def checksize(self):
2831 2834 expected = 0
2832 2835 if len(self):
2833 2836 expected = max(0, self.end(len(self) - 1))
2834 2837
2835 2838 try:
2836 2839 with self._datafp() as f:
2837 2840 f.seek(0, 2)
2838 2841 actual = f.tell()
2839 2842 dd = actual - expected
2840 2843 except IOError as inst:
2841 2844 if inst.errno != errno.ENOENT:
2842 2845 raise
2843 2846 dd = 0
2844 2847
2845 2848 try:
2846 2849 f = self.opener(self.indexfile)
2847 2850 f.seek(0, 2)
2848 2851 actual = f.tell()
2849 2852 f.close()
2850 2853 s = self._io.size
2851 2854 i = max(0, actual // s)
2852 2855 di = actual - (i * s)
2853 2856 if self._inline:
2854 2857 databytes = 0
2855 2858 for r in self:
2856 2859 databytes += max(0, self.length(r))
2857 2860 dd = 0
2858 2861 di = actual - len(self) * s - databytes
2859 2862 except IOError as inst:
2860 2863 if inst.errno != errno.ENOENT:
2861 2864 raise
2862 2865 di = 0
2863 2866
2864 2867 return (dd, di)
2865 2868
2866 2869 def files(self):
2867 2870 res = [self.indexfile]
2868 2871 if not self._inline:
2869 2872 res.append(self.datafile)
2870 2873 return res
2871 2874
2872 2875 DELTAREUSEALWAYS = 'always'
2873 2876 DELTAREUSESAMEREVS = 'samerevs'
2874 2877 DELTAREUSENEVER = 'never'
2875 2878
2876 2879 DELTAREUSEFULLADD = 'fulladd'
2877 2880
2878 2881 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2879 2882
2880 2883 def clone(self, tr, destrevlog, addrevisioncb=None,
2881 2884 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2882 2885 """Copy this revlog to another, possibly with format changes.
2883 2886
2884 2887 The destination revlog will contain the same revisions and nodes.
2885 2888 However, it may not be bit-for-bit identical due to e.g. delta encoding
2886 2889 differences.
2887 2890
2888 2891 The ``deltareuse`` argument control how deltas from the existing revlog
2889 2892 are preserved in the destination revlog. The argument can have the
2890 2893 following values:
2891 2894
2892 2895 DELTAREUSEALWAYS
2893 2896 Deltas will always be reused (if possible), even if the destination
2894 2897 revlog would not select the same revisions for the delta. This is the
2895 2898 fastest mode of operation.
2896 2899 DELTAREUSESAMEREVS
2897 2900 Deltas will be reused if the destination revlog would pick the same
2898 2901 revisions for the delta. This mode strikes a balance between speed
2899 2902 and optimization.
2900 2903 DELTAREUSENEVER
2901 2904 Deltas will never be reused. This is the slowest mode of execution.
2902 2905 This mode can be used to recompute deltas (e.g. if the diff/delta
2903 2906 algorithm changes).
2904 2907
2905 2908 Delta computation can be slow, so the choice of delta reuse policy can
2906 2909 significantly affect run time.
2907 2910
2908 2911 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2909 2912 two extremes. Deltas will be reused if they are appropriate. But if the
2910 2913 delta could choose a better revision, it will do so. This means if you
2911 2914 are converting a non-generaldelta revlog to a generaldelta revlog,
2912 2915 deltas will be recomputed if the delta's parent isn't a parent of the
2913 2916 revision.
2914 2917
2915 2918 In addition to the delta policy, the ``deltabothparents`` argument
2916 2919 controls whether to compute deltas against both parents for merges.
2917 2920 By default, the current default is used.
2918 2921 """
2919 2922 if deltareuse not in self.DELTAREUSEALL:
2920 2923 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2921 2924
2922 2925 if len(destrevlog):
2923 2926 raise ValueError(_('destination revlog is not empty'))
2924 2927
2925 2928 if getattr(self, 'filteredrevs', None):
2926 2929 raise ValueError(_('source revlog has filtered revisions'))
2927 2930 if getattr(destrevlog, 'filteredrevs', None):
2928 2931 raise ValueError(_('destination revlog has filtered revisions'))
2929 2932
2930 2933 # lazydeltabase controls whether to reuse a cached delta, if possible.
2931 2934 oldlazydeltabase = destrevlog._lazydeltabase
2932 2935 oldamd = destrevlog._deltabothparents
2933 2936
2934 2937 try:
2935 2938 if deltareuse == self.DELTAREUSEALWAYS:
2936 2939 destrevlog._lazydeltabase = True
2937 2940 elif deltareuse == self.DELTAREUSESAMEREVS:
2938 2941 destrevlog._lazydeltabase = False
2939 2942
2940 2943 destrevlog._deltabothparents = deltabothparents or oldamd
2941 2944
2942 2945 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2943 2946 self.DELTAREUSESAMEREVS)
2944 2947
2945 2948 deltacomputer = _deltacomputer(destrevlog)
2946 2949 index = self.index
2947 2950 for rev in self:
2948 2951 entry = index[rev]
2949 2952
2950 2953 # Some classes override linkrev to take filtered revs into
2951 2954 # account. Use raw entry from index.
2952 2955 flags = entry[0] & 0xffff
2953 2956 linkrev = entry[4]
2954 2957 p1 = index[entry[5]][7]
2955 2958 p2 = index[entry[6]][7]
2956 2959 node = entry[7]
2957 2960
2958 2961 # (Possibly) reuse the delta from the revlog if allowed and
2959 2962 # the revlog chunk is a delta.
2960 2963 cachedelta = None
2961 2964 rawtext = None
2962 2965 if populatecachedelta:
2963 2966 dp = self.deltaparent(rev)
2964 2967 if dp != nullrev:
2965 2968 cachedelta = (dp, bytes(self._chunk(rev)))
2966 2969
2967 2970 if not cachedelta:
2968 2971 rawtext = self.revision(rev, raw=True)
2969 2972
2970 2973
2971 2974 if deltareuse == self.DELTAREUSEFULLADD:
2972 2975 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2973 2976 cachedelta=cachedelta,
2974 2977 node=node, flags=flags,
2975 2978 deltacomputer=deltacomputer)
2976 2979 else:
2977 2980 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2978 2981 checkambig=False)
2979 2982 dfh = None
2980 2983 if not destrevlog._inline:
2981 2984 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2982 2985 try:
2983 2986 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2984 2987 p2, flags, cachedelta, ifh, dfh,
2985 2988 deltacomputer=deltacomputer)
2986 2989 finally:
2987 2990 if dfh:
2988 2991 dfh.close()
2989 2992 ifh.close()
2990 2993
2991 2994 if addrevisioncb:
2992 2995 addrevisioncb(self, rev, node)
2993 2996 finally:
2994 2997 destrevlog._lazydeltabase = oldlazydeltabase
2995 2998 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now