##// END OF EJS Templates
revlog: fix pure nodemap to not access missing index entry...
Yuya Nishihara -
r39176:65d5de11 default
parent child Browse files
Show More
@@ -1,3005 +1,3007 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # Trim empty revs at the end, but never the very first revision of a chain
267 267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 268 endidx -= 1
269 269
270 270 return revs[startidx:endidx]
271 271
272 272 def _segmentspan(revlog, revs):
273 273 """Get the byte span of a segment of revisions
274 274
275 275 revs is a sorted array of revision numbers
276 276
277 277 >>> revlog = _testrevlog([
278 278 ... 5, #0
279 279 ... 10, #1
280 280 ... 12, #2
281 281 ... 12, #3 (empty)
282 282 ... 17, #4
283 283 ... ])
284 284
285 285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 286 17
287 287 >>> _segmentspan(revlog, [0, 4])
288 288 17
289 289 >>> _segmentspan(revlog, [3, 4])
290 290 5
291 291 >>> _segmentspan(revlog, [1, 2, 3,])
292 292 7
293 293 >>> _segmentspan(revlog, [1, 3])
294 294 7
295 295 """
296 296 if not revs:
297 297 return 0
298 298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299 299
300 300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 301 """slice revs to reduce the amount of unrelated data to be read from disk.
302 302
303 303 ``revs`` is sliced into groups that should be read in one time.
304 304 Assume that revs are sorted.
305 305
306 306 The initial chunk is sliced until the overall density (payload/chunks-span
307 307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 308 `revlog._srmingapsize` is skipped.
309 309
310 310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 311 For consistency with other slicing choice, this limit won't go lower than
312 312 `revlog._srmingapsize`.
313 313
314 314 If individual revisions chunk are larger than this limit, they will still
315 315 be raised individually.
316 316
317 317 >>> revlog = _testrevlog([
318 318 ... 5, #00 (5)
319 319 ... 10, #01 (5)
320 320 ... 12, #02 (2)
321 321 ... 12, #03 (empty)
322 322 ... 27, #04 (15)
323 323 ... 31, #05 (4)
324 324 ... 31, #06 (empty)
325 325 ... 42, #07 (11)
326 326 ... 47, #08 (5)
327 327 ... 47, #09 (empty)
328 328 ... 48, #10 (1)
329 329 ... 51, #11 (3)
330 330 ... 74, #12 (23)
331 331 ... 85, #13 (11)
332 332 ... 86, #14 (1)
333 333 ... 91, #15 (5)
334 334 ... ])
335 335
336 336 >>> list(_slicechunk(revlog, list(range(16))))
337 337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 338 >>> list(_slicechunk(revlog, [0, 15]))
339 339 [[0], [15]]
340 340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 341 [[0], [11], [15]]
342 342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 343 [[0], [11, 13, 15]]
344 344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 345 [[1, 2], [5, 8, 10, 11], [14]]
346 346
347 347 Slicing with a maximum chunk size
348 348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 349 [[0], [11], [13], [15]]
350 350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 351 [[0], [11], [13, 15]]
352 352 """
353 353 if targetsize is not None:
354 354 targetsize = max(targetsize, revlog._srmingapsize)
355 355 # targetsize should not be specified when evaluating delta candidates:
356 356 # * targetsize is used to ensure we stay within specification when reading,
357 357 # * deltainfo is used to pick are good delta chain when writing.
358 358 if not (deltainfo is None or targetsize is None):
359 359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 360 raise error.ProgrammingError(msg)
361 361 for chunk in _slicechunktodensity(revlog, revs,
362 362 deltainfo,
363 363 revlog._srdensitythreshold,
364 364 revlog._srmingapsize):
365 365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 366 yield subchunk
367 367
368 368 def _slicechunktosize(revlog, revs, targetsize=None):
369 369 """slice revs to match the target size
370 370
371 371 This is intended to be used on chunk that density slicing selected by that
372 372 are still too large compared to the read garantee of revlog. This might
373 373 happens when "minimal gap size" interrupted the slicing or when chain are
374 374 built in a way that create large blocks next to each other.
375 375
376 376 >>> revlog = _testrevlog([
377 377 ... 3, #0 (3)
378 378 ... 5, #1 (2)
379 379 ... 6, #2 (1)
380 380 ... 8, #3 (2)
381 381 ... 8, #4 (empty)
382 382 ... 11, #5 (3)
383 383 ... 12, #6 (1)
384 384 ... 13, #7 (1)
385 385 ... 14, #8 (1)
386 386 ... ])
387 387
388 388 Cases where chunk is already small enough
389 389 >>> list(_slicechunktosize(revlog, [0], 3))
390 390 [[0]]
391 391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 392 [[6, 7]]
393 393 >>> list(_slicechunktosize(revlog, [0], None))
394 394 [[0]]
395 395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 396 [[6, 7]]
397 397
398 398 cases where we need actual slicing
399 399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 400 [[0], [1]]
401 401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 402 [[1], [3]]
403 403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 404 [[1, 2], [3]]
405 405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 406 [[3], [5]]
407 407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 408 [[3], [5]]
409 409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 410 [[5], [6, 7, 8]]
411 411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413 413
414 414 Case with too large individual chunk (must return valid chunk)
415 415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 416 [[0], [1]]
417 417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 418 [[1], [3]]
419 419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 420 [[3], [5]]
421 421 """
422 422 assert targetsize is None or 0 <= targetsize
423 423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 424 yield revs
425 425 return
426 426
427 427 startrevidx = 0
428 428 startdata = revlog.start(revs[0])
429 429 endrevidx = 0
430 430 iterrevs = enumerate(revs)
431 431 next(iterrevs) # skip first rev.
432 432 for idx, r in iterrevs:
433 433 span = revlog.end(r) - startdata
434 434 if span <= targetsize:
435 435 endrevidx = idx
436 436 else:
437 437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 438 if chunk:
439 439 yield chunk
440 440 startrevidx = idx
441 441 startdata = revlog.start(r)
442 442 endrevidx = idx
443 443 yield _trimchunk(revlog, revs, startrevidx)
444 444
445 445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 446 mingapsize=0):
447 447 """slice revs to reduce the amount of unrelated data to be read from disk.
448 448
449 449 ``revs`` is sliced into groups that should be read in one time.
450 450 Assume that revs are sorted.
451 451
452 452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 453 to the top of the revlog.
454 454
455 455 The initial chunk is sliced until the overall density (payload/chunks-span
456 456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 457 skipped.
458 458
459 459 >>> revlog = _testrevlog([
460 460 ... 5, #00 (5)
461 461 ... 10, #01 (5)
462 462 ... 12, #02 (2)
463 463 ... 12, #03 (empty)
464 464 ... 27, #04 (15)
465 465 ... 31, #05 (4)
466 466 ... 31, #06 (empty)
467 467 ... 42, #07 (11)
468 468 ... 47, #08 (5)
469 469 ... 47, #09 (empty)
470 470 ... 48, #10 (1)
471 471 ... 51, #11 (3)
472 472 ... 74, #12 (23)
473 473 ... 85, #13 (11)
474 474 ... 86, #14 (1)
475 475 ... 91, #15 (5)
476 476 ... ])
477 477
478 478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 481 [[0], [15]]
482 482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 483 [[0], [11], [15]]
484 484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 485 [[0], [11, 13, 15]]
486 486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 487 [[1, 2], [5, 8, 10, 11], [14]]
488 488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 489 ... mingapsize=20))
490 490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 492 ... targetdensity=0.95))
493 493 [[1, 2], [5], [8, 10, 11], [14]]
494 494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 495 ... targetdensity=0.95, mingapsize=12))
496 496 [[1, 2], [5, 8, 10, 11], [14]]
497 497 """
498 498 start = revlog.start
499 499 length = revlog.length
500 500
501 501 if len(revs) <= 1:
502 502 yield revs
503 503 return
504 504
505 505 nextrev = len(revlog)
506 506 nextoffset = revlog.end(nextrev - 1)
507 507
508 508 if deltainfo is None:
509 509 deltachainspan = _segmentspan(revlog, revs)
510 510 chainpayload = sum(length(r) for r in revs)
511 511 else:
512 512 deltachainspan = deltainfo.distance
513 513 chainpayload = deltainfo.compresseddeltalen
514 514
515 515 if deltachainspan < mingapsize:
516 516 yield revs
517 517 return
518 518
519 519 readdata = deltachainspan
520 520
521 521 if deltachainspan:
522 522 density = chainpayload / float(deltachainspan)
523 523 else:
524 524 density = 1.0
525 525
526 526 if density >= targetdensity:
527 527 yield revs
528 528 return
529 529
530 530 if deltainfo is not None:
531 531 revs = list(revs)
532 532 revs.append(nextrev)
533 533
534 534 # Store the gaps in a heap to have them sorted by decreasing size
535 535 gapsheap = []
536 536 heapq.heapify(gapsheap)
537 537 prevend = None
538 538 for i, rev in enumerate(revs):
539 539 if rev < nextrev:
540 540 revstart = start(rev)
541 541 revlen = length(rev)
542 542 else:
543 543 revstart = nextoffset
544 544 revlen = deltainfo.deltalen
545 545
546 546 # Skip empty revisions to form larger holes
547 547 if revlen == 0:
548 548 continue
549 549
550 550 if prevend is not None:
551 551 gapsize = revstart - prevend
552 552 # only consider holes that are large enough
553 553 if gapsize > mingapsize:
554 554 heapq.heappush(gapsheap, (-gapsize, i))
555 555
556 556 prevend = revstart + revlen
557 557
558 558 # Collect the indices of the largest holes until the density is acceptable
559 559 indicesheap = []
560 560 heapq.heapify(indicesheap)
561 561 while gapsheap and density < targetdensity:
562 562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563 563
564 564 heapq.heappush(indicesheap, gapidx)
565 565
566 566 # the gap sizes are stored as negatives to be sorted decreasingly
567 567 # by the heap
568 568 readdata -= (-oppgapsize)
569 569 if readdata > 0:
570 570 density = chainpayload / float(readdata)
571 571 else:
572 572 density = 1.0
573 573
574 574 # Cut the revs at collected indices
575 575 previdx = 0
576 576 while indicesheap:
577 577 idx = heapq.heappop(indicesheap)
578 578
579 579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 580 if chunk:
581 581 yield chunk
582 582
583 583 previdx = idx
584 584
585 585 chunk = _trimchunk(revlog, revs, previdx)
586 586 if chunk:
587 587 yield chunk
588 588
589 589 @attr.s(slots=True, frozen=True)
590 590 class _deltainfo(object):
591 591 distance = attr.ib()
592 592 deltalen = attr.ib()
593 593 data = attr.ib()
594 594 base = attr.ib()
595 595 chainbase = attr.ib()
596 596 chainlen = attr.ib()
597 597 compresseddeltalen = attr.ib()
598 598
599 599 class _deltacomputer(object):
600 600 def __init__(self, revlog):
601 601 self.revlog = revlog
602 602
603 603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 604 """
605 605 Provides revisions that present an interest to be diffed against,
606 606 grouped by level of easiness.
607 607 """
608 608 revlog = self.revlog
609 609 gdelta = revlog._generaldelta
610 610 curr = len(revlog)
611 611 prev = curr - 1
612 612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613 613
614 614 # should we try to build a delta?
615 615 if prev != nullrev and revlog.storedeltachains:
616 616 tested = set()
617 617 # This condition is true most of the time when processing
618 618 # changegroup data into a generaldelta repo. The only time it
619 619 # isn't true is if this is the first revision in a delta chain
620 620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 621 if cachedelta and gdelta and revlog._lazydeltabase:
622 622 # Assume what we received from the server is a good choice
623 623 # build delta will reuse the cache
624 624 yield (cachedelta[0],)
625 625 tested.add(cachedelta[0])
626 626
627 627 if gdelta:
628 628 # exclude already lazy tested base if any
629 629 parents = [p for p in (p1r, p2r)
630 630 if p != nullrev and p not in tested]
631 631
632 632 if not revlog._deltabothparents and len(parents) == 2:
633 633 parents.sort()
634 634 # To minimize the chance of having to build a fulltext,
635 635 # pick first whichever parent is closest to us (max rev)
636 636 yield (parents[1],)
637 637 # then the other one (min rev) if the first did not fit
638 638 yield (parents[0],)
639 639 tested.update(parents)
640 640 elif len(parents) > 0:
641 641 # Test all parents (1 or 2), and keep the best candidate
642 642 yield parents
643 643 tested.update(parents)
644 644
645 645 if prev not in tested:
646 646 # other approach failed try against prev to hopefully save us a
647 647 # fulltext.
648 648 yield (prev,)
649 649 tested.add(prev)
650 650
651 651 def buildtext(self, revinfo, fh):
652 652 """Builds a fulltext version of a revision
653 653
654 654 revinfo: _revisioninfo instance that contains all needed info
655 655 fh: file handle to either the .i or the .d revlog file,
656 656 depending on whether it is inlined or not
657 657 """
658 658 btext = revinfo.btext
659 659 if btext[0] is not None:
660 660 return btext[0]
661 661
662 662 revlog = self.revlog
663 663 cachedelta = revinfo.cachedelta
664 664 flags = revinfo.flags
665 665 node = revinfo.node
666 666
667 667 baserev = cachedelta[0]
668 668 delta = cachedelta[1]
669 669 # special case deltas which replace entire base; no need to decode
670 670 # base revision. this neatly avoids censored bases, which throw when
671 671 # they're decoded.
672 672 hlen = struct.calcsize(">lll")
673 673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 674 len(delta) - hlen):
675 675 btext[0] = delta[hlen:]
676 676 else:
677 677 # deltabase is rawtext before changed by flag processors, which is
678 678 # equivalent to non-raw text
679 679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 680 btext[0] = mdiff.patch(basetext, delta)
681 681
682 682 try:
683 683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 684 btext[0], validatehash = res
685 685 if validatehash:
686 686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 687 if flags & REVIDX_ISCENSORED:
688 688 raise RevlogError(_('node %s is not censored') % node)
689 689 except CensoredNodeError:
690 690 # must pass the censored index flag to add censored revisions
691 691 if not flags & REVIDX_ISCENSORED:
692 692 raise
693 693 return btext[0]
694 694
695 695 def _builddeltadiff(self, base, revinfo, fh):
696 696 revlog = self.revlog
697 697 t = self.buildtext(revinfo, fh)
698 698 if revlog.iscensored(base):
699 699 # deltas based on a censored revision must replace the
700 700 # full content in one patch, so delta works everywhere
701 701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 702 delta = header + t
703 703 else:
704 704 ptext = revlog.revision(base, _df=fh, raw=True)
705 705 delta = mdiff.textdiff(ptext, t)
706 706
707 707 return delta
708 708
709 709 def _builddeltainfo(self, revinfo, base, fh):
710 710 # can we use the cached delta?
711 711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 712 delta = revinfo.cachedelta[1]
713 713 else:
714 714 delta = self._builddeltadiff(base, revinfo, fh)
715 715 revlog = self.revlog
716 716 header, data = revlog.compress(delta)
717 717 deltalen = len(header) + len(data)
718 718 chainbase = revlog.chainbase(base)
719 719 offset = revlog.end(len(revlog) - 1)
720 720 dist = deltalen + offset - revlog.start(chainbase)
721 721 if revlog._generaldelta:
722 722 deltabase = base
723 723 else:
724 724 deltabase = chainbase
725 725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 726 chainlen += 1
727 727 compresseddeltalen += deltalen
728 728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 729 chainbase, chainlen, compresseddeltalen)
730 730
731 731 def finddeltainfo(self, revinfo, fh):
732 732 """Find an acceptable delta against a candidate revision
733 733
734 734 revinfo: information about the revision (instance of _revisioninfo)
735 735 fh: file handle to either the .i or the .d revlog file,
736 736 depending on whether it is inlined or not
737 737
738 738 Returns the first acceptable candidate revision, as ordered by
739 739 _getcandidaterevs
740 740 """
741 741 if not revinfo.textlen:
742 742 return None # empty file do not need delta
743 743
744 744 cachedelta = revinfo.cachedelta
745 745 p1 = revinfo.p1
746 746 p2 = revinfo.p2
747 747 revlog = self.revlog
748 748
749 749 deltalength = self.revlog.length
750 750 deltaparent = self.revlog.deltaparent
751 751
752 752 deltainfo = None
753 753 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
754 754 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
755 755 # filter out delta base that will never produce good delta
756 756 candidaterevs = [r for r in candidaterevs
757 757 if self.revlog.length(r) <= deltas_limit]
758 758 nominateddeltas = []
759 759 for candidaterev in candidaterevs:
760 760 # skip over empty delta (no need to include them in a chain)
761 761 while candidaterev != nullrev and not deltalength(candidaterev):
762 762 candidaterev = deltaparent(candidaterev)
763 763 # no need to try a delta against nullid, this will be handled
764 764 # by fulltext later.
765 765 if candidaterev == nullrev:
766 766 continue
767 767 # no delta for rawtext-changing revs (see "candelta" for why)
768 768 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
769 769 continue
770 770 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
771 771 if revlog._isgooddeltainfo(candidatedelta, revinfo):
772 772 nominateddeltas.append(candidatedelta)
773 773 if nominateddeltas:
774 774 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
775 775 break
776 776
777 777 return deltainfo
778 778
779 779 @attr.s(slots=True, frozen=True)
780 780 class _revisioninfo(object):
781 781 """Information about a revision that allows building its fulltext
782 782 node: expected hash of the revision
783 783 p1, p2: parent revs of the revision
784 784 btext: built text cache consisting of a one-element list
785 785 cachedelta: (baserev, uncompressed_delta) or None
786 786 flags: flags associated to the revision storage
787 787
788 788 One of btext[0] or cachedelta must be set.
789 789 """
790 790 node = attr.ib()
791 791 p1 = attr.ib()
792 792 p2 = attr.ib()
793 793 btext = attr.ib()
794 794 textlen = attr.ib()
795 795 cachedelta = attr.ib()
796 796 flags = attr.ib()
797 797
798 798 # index v0:
799 799 # 4 bytes: offset
800 800 # 4 bytes: compressed length
801 801 # 4 bytes: base rev
802 802 # 4 bytes: link rev
803 803 # 20 bytes: parent 1 nodeid
804 804 # 20 bytes: parent 2 nodeid
805 805 # 20 bytes: nodeid
806 806 indexformatv0 = struct.Struct(">4l20s20s20s")
807 807 indexformatv0_pack = indexformatv0.pack
808 808 indexformatv0_unpack = indexformatv0.unpack
809 809
810 810 class revlogoldindex(list):
811 811 def __getitem__(self, i):
812 812 if i == -1:
813 813 return (0, 0, 0, -1, -1, -1, -1, nullid)
814 814 return list.__getitem__(self, i)
815 815
816 816 # maximum <delta-chain-data>/<revision-text-length> ratio
817 817 LIMIT_DELTA2TEXT = 2
818 818
819 819 class revlogoldio(object):
820 820 def __init__(self):
821 821 self.size = indexformatv0.size
822 822
823 823 def parseindex(self, data, inline):
824 824 s = self.size
825 825 index = []
826 826 nodemap = {nullid: nullrev}
827 827 n = off = 0
828 828 l = len(data)
829 829 while off + s <= l:
830 830 cur = data[off:off + s]
831 831 off += s
832 832 e = indexformatv0_unpack(cur)
833 833 # transform to revlogv1 format
834 834 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
835 835 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
836 836 index.append(e2)
837 837 nodemap[e[6]] = n
838 838 n += 1
839 839
840 840 return revlogoldindex(index), nodemap, None
841 841
842 842 def packentry(self, entry, node, version, rev):
843 843 if gettype(entry[0]):
844 844 raise RevlogError(_('index entry flags need revlog version 1'))
845 845 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
846 846 node(entry[5]), node(entry[6]), entry[7])
847 847 return indexformatv0_pack(*e2)
848 848
849 849 # index ng:
850 850 # 6 bytes: offset
851 851 # 2 bytes: flags
852 852 # 4 bytes: compressed length
853 853 # 4 bytes: uncompressed length
854 854 # 4 bytes: base rev
855 855 # 4 bytes: link rev
856 856 # 4 bytes: parent 1 rev
857 857 # 4 bytes: parent 2 rev
858 858 # 32 bytes: nodeid
859 859 indexformatng = struct.Struct(">Qiiiiii20s12x")
860 860 indexformatng_pack = indexformatng.pack
861 861 versionformat = struct.Struct(">I")
862 862 versionformat_pack = versionformat.pack
863 863 versionformat_unpack = versionformat.unpack
864 864
865 865 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
866 866 # signed integer)
867 867 _maxentrysize = 0x7fffffff
868 868
869 869 class revlogio(object):
870 870 def __init__(self):
871 871 self.size = indexformatng.size
872 872
873 873 def parseindex(self, data, inline):
874 874 # call the C implementation to parse the index data
875 875 index, cache = parsers.parse_index2(data, inline)
876 876 return index, getattr(index, 'nodemap', None), cache
877 877
878 878 def packentry(self, entry, node, version, rev):
879 879 p = indexformatng_pack(*entry)
880 880 if rev == 0:
881 881 p = versionformat_pack(version) + p[4:]
882 882 return p
883 883
884 884 class revlog(object):
885 885 """
886 886 the underlying revision storage object
887 887
888 888 A revlog consists of two parts, an index and the revision data.
889 889
890 890 The index is a file with a fixed record size containing
891 891 information on each revision, including its nodeid (hash), the
892 892 nodeids of its parents, the position and offset of its data within
893 893 the data file, and the revision it's based on. Finally, each entry
894 894 contains a linkrev entry that can serve as a pointer to external
895 895 data.
896 896
897 897 The revision data itself is a linear collection of data chunks.
898 898 Each chunk represents a revision and is usually represented as a
899 899 delta against the previous chunk. To bound lookup time, runs of
900 900 deltas are limited to about 2 times the length of the original
901 901 version data. This makes retrieval of a version proportional to
902 902 its size, or O(1) relative to the number of revisions.
903 903
904 904 Both pieces of the revlog are written to in an append-only
905 905 fashion, which means we never need to rewrite a file to insert or
906 906 remove data, and can use some simple techniques to avoid the need
907 907 for locking while reading.
908 908
909 909 If checkambig, indexfile is opened with checkambig=True at
910 910 writing, to avoid file stat ambiguity.
911 911
912 912 If mmaplargeindex is True, and an mmapindexthreshold is set, the
913 913 index will be mmapped rather than read if it is larger than the
914 914 configured threshold.
915 915
916 916 If censorable is True, the revlog can have censored revisions.
917 917 """
918 918 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
919 919 mmaplargeindex=False, censorable=False):
920 920 """
921 921 create a revlog object
922 922
923 923 opener is a function that abstracts the file opening operation
924 924 and can be used to implement COW semantics or the like.
925 925 """
926 926 self.indexfile = indexfile
927 927 self.datafile = datafile or (indexfile[:-2] + ".d")
928 928 self.opener = opener
929 929 # When True, indexfile is opened with checkambig=True at writing, to
930 930 # avoid file stat ambiguity.
931 931 self._checkambig = checkambig
932 932 self._censorable = censorable
933 933 # 3-tuple of (node, rev, text) for a raw revision.
934 934 self._cache = None
935 935 # Maps rev to chain base rev.
936 936 self._chainbasecache = util.lrucachedict(100)
937 937 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
938 938 self._chunkcache = (0, '')
939 939 # How much data to read and cache into the raw revlog data cache.
940 940 self._chunkcachesize = 65536
941 941 self._maxchainlen = None
942 942 self._deltabothparents = True
943 943 self.index = []
944 944 # Mapping of partial identifiers to full nodes.
945 945 self._pcache = {}
946 946 # Mapping of revision integer to full node.
947 947 self._nodecache = {nullid: nullrev}
948 948 self._nodepos = None
949 949 self._compengine = 'zlib'
950 950 self._maxdeltachainspan = -1
951 951 self._withsparseread = False
952 952 self._sparserevlog = False
953 953 self._srdensitythreshold = 0.50
954 954 self._srmingapsize = 262144
955 955
956 956 mmapindexthreshold = None
957 957 v = REVLOG_DEFAULT_VERSION
958 958 opts = getattr(opener, 'options', None)
959 959 if opts is not None:
960 960 if 'revlogv2' in opts:
961 961 # version 2 revlogs always use generaldelta.
962 962 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
963 963 elif 'revlogv1' in opts:
964 964 if 'generaldelta' in opts:
965 965 v |= FLAG_GENERALDELTA
966 966 else:
967 967 v = 0
968 968 if 'chunkcachesize' in opts:
969 969 self._chunkcachesize = opts['chunkcachesize']
970 970 if 'maxchainlen' in opts:
971 971 self._maxchainlen = opts['maxchainlen']
972 972 if 'deltabothparents' in opts:
973 973 self._deltabothparents = opts['deltabothparents']
974 974 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
975 975 if 'compengine' in opts:
976 976 self._compengine = opts['compengine']
977 977 if 'maxdeltachainspan' in opts:
978 978 self._maxdeltachainspan = opts['maxdeltachainspan']
979 979 if mmaplargeindex and 'mmapindexthreshold' in opts:
980 980 mmapindexthreshold = opts['mmapindexthreshold']
981 981 self._sparserevlog = bool(opts.get('sparse-revlog', False))
982 982 withsparseread = bool(opts.get('with-sparse-read', False))
983 983 # sparse-revlog forces sparse-read
984 984 self._withsparseread = self._sparserevlog or withsparseread
985 985 if 'sparse-read-density-threshold' in opts:
986 986 self._srdensitythreshold = opts['sparse-read-density-threshold']
987 987 if 'sparse-read-min-gap-size' in opts:
988 988 self._srmingapsize = opts['sparse-read-min-gap-size']
989 989
990 990 if self._chunkcachesize <= 0:
991 991 raise RevlogError(_('revlog chunk cache size %r is not greater '
992 992 'than 0') % self._chunkcachesize)
993 993 elif self._chunkcachesize & (self._chunkcachesize - 1):
994 994 raise RevlogError(_('revlog chunk cache size %r is not a power '
995 995 'of 2') % self._chunkcachesize)
996 996
997 997 indexdata = ''
998 998 self._initempty = True
999 999 try:
1000 1000 with self._indexfp() as f:
1001 1001 if (mmapindexthreshold is not None and
1002 1002 self.opener.fstat(f).st_size >= mmapindexthreshold):
1003 1003 indexdata = util.buffer(util.mmapread(f))
1004 1004 else:
1005 1005 indexdata = f.read()
1006 1006 if len(indexdata) > 0:
1007 1007 v = versionformat_unpack(indexdata[:4])[0]
1008 1008 self._initempty = False
1009 1009 except IOError as inst:
1010 1010 if inst.errno != errno.ENOENT:
1011 1011 raise
1012 1012
1013 1013 self.version = v
1014 1014 self._inline = v & FLAG_INLINE_DATA
1015 1015 self._generaldelta = v & FLAG_GENERALDELTA
1016 1016 flags = v & ~0xFFFF
1017 1017 fmt = v & 0xFFFF
1018 1018 if fmt == REVLOGV0:
1019 1019 if flags:
1020 1020 raise RevlogError(_('unknown flags (%#04x) in version %d '
1021 1021 'revlog %s') %
1022 1022 (flags >> 16, fmt, self.indexfile))
1023 1023 elif fmt == REVLOGV1:
1024 1024 if flags & ~REVLOGV1_FLAGS:
1025 1025 raise RevlogError(_('unknown flags (%#04x) in version %d '
1026 1026 'revlog %s') %
1027 1027 (flags >> 16, fmt, self.indexfile))
1028 1028 elif fmt == REVLOGV2:
1029 1029 if flags & ~REVLOGV2_FLAGS:
1030 1030 raise RevlogError(_('unknown flags (%#04x) in version %d '
1031 1031 'revlog %s') %
1032 1032 (flags >> 16, fmt, self.indexfile))
1033 1033 else:
1034 1034 raise RevlogError(_('unknown version (%d) in revlog %s') %
1035 1035 (fmt, self.indexfile))
1036 1036
1037 1037 self.storedeltachains = True
1038 1038
1039 1039 self._io = revlogio()
1040 1040 if self.version == REVLOGV0:
1041 1041 self._io = revlogoldio()
1042 1042 try:
1043 1043 d = self._io.parseindex(indexdata, self._inline)
1044 1044 except (ValueError, IndexError):
1045 1045 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1046 1046 self.index, nodemap, self._chunkcache = d
1047 1047 if nodemap is not None:
1048 1048 self.nodemap = self._nodecache = nodemap
1049 1049 if not self._chunkcache:
1050 1050 self._chunkclear()
1051 1051 # revnum -> (chain-length, sum-delta-length)
1052 1052 self._chaininfocache = {}
1053 1053 # revlog header -> revlog compressor
1054 1054 self._decompressors = {}
1055 1055
1056 1056 @util.propertycache
1057 1057 def _compressor(self):
1058 1058 return util.compengines[self._compengine].revlogcompressor()
1059 1059
1060 1060 def _indexfp(self, mode='r'):
1061 1061 """file object for the revlog's index file"""
1062 1062 args = {r'mode': mode}
1063 1063 if mode != 'r':
1064 1064 args[r'checkambig'] = self._checkambig
1065 1065 if mode == 'w':
1066 1066 args[r'atomictemp'] = True
1067 1067 return self.opener(self.indexfile, **args)
1068 1068
1069 1069 def _datafp(self, mode='r'):
1070 1070 """file object for the revlog's data file"""
1071 1071 return self.opener(self.datafile, mode=mode)
1072 1072
1073 1073 @contextlib.contextmanager
1074 1074 def _datareadfp(self, existingfp=None):
1075 1075 """file object suitable to read data"""
1076 1076 if existingfp is not None:
1077 1077 yield existingfp
1078 1078 else:
1079 1079 if self._inline:
1080 1080 func = self._indexfp
1081 1081 else:
1082 1082 func = self._datafp
1083 1083 with func() as fp:
1084 1084 yield fp
1085 1085
1086 1086 def tip(self):
1087 1087 return self.node(len(self.index) - 1)
1088 1088 def __contains__(self, rev):
1089 1089 return 0 <= rev < len(self)
1090 1090 def __len__(self):
1091 1091 return len(self.index)
1092 1092 def __iter__(self):
1093 1093 return iter(pycompat.xrange(len(self)))
1094 1094 def revs(self, start=0, stop=None):
1095 1095 """iterate over all rev in this revlog (from start to stop)"""
1096 1096 step = 1
1097 1097 length = len(self)
1098 1098 if stop is not None:
1099 1099 if start > stop:
1100 1100 step = -1
1101 1101 stop += step
1102 1102 if stop > length:
1103 1103 stop = length
1104 1104 else:
1105 1105 stop = length
1106 1106 return pycompat.xrange(start, stop, step)
1107 1107
1108 1108 @util.propertycache
1109 1109 def nodemap(self):
1110 if self.index:
1111 # populate mapping down to the initial node
1110 1112 self.rev(self.node(0))
1111 1113 return self._nodecache
1112 1114
1113 1115 def hasnode(self, node):
1114 1116 try:
1115 1117 self.rev(node)
1116 1118 return True
1117 1119 except KeyError:
1118 1120 return False
1119 1121
1120 1122 def candelta(self, baserev, rev):
1121 1123 """whether two revisions (baserev, rev) can be delta-ed or not"""
1122 1124 # Disable delta if either rev requires a content-changing flag
1123 1125 # processor (ex. LFS). This is because such flag processor can alter
1124 1126 # the rawtext content that the delta will be based on, and two clients
1125 1127 # could have a same revlog node with different flags (i.e. different
1126 1128 # rawtext contents) and the delta could be incompatible.
1127 1129 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1128 1130 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1129 1131 return False
1130 1132 return True
1131 1133
1132 1134 def clearcaches(self):
1133 1135 self._cache = None
1134 1136 self._chainbasecache.clear()
1135 1137 self._chunkcache = (0, '')
1136 1138 self._pcache = {}
1137 1139
1138 1140 try:
1139 1141 self._nodecache.clearcaches()
1140 1142 except AttributeError:
1141 1143 self._nodecache = {nullid: nullrev}
1142 1144 self._nodepos = None
1143 1145
1144 1146 def rev(self, node):
1145 1147 try:
1146 1148 return self._nodecache[node]
1147 1149 except TypeError:
1148 1150 raise
1149 1151 except RevlogError:
1150 1152 # parsers.c radix tree lookup failed
1151 1153 if node == wdirid or node in wdirfilenodeids:
1152 1154 raise error.WdirUnsupported
1153 1155 raise LookupError(node, self.indexfile, _('no node'))
1154 1156 except KeyError:
1155 1157 # pure python cache lookup failed
1156 1158 n = self._nodecache
1157 1159 i = self.index
1158 1160 p = self._nodepos
1159 1161 if p is None:
1160 1162 p = len(i) - 1
1161 1163 else:
1162 1164 assert p < len(i)
1163 1165 for r in pycompat.xrange(p, -1, -1):
1164 1166 v = i[r][7]
1165 1167 n[v] = r
1166 1168 if v == node:
1167 1169 self._nodepos = r - 1
1168 1170 return r
1169 1171 if node == wdirid or node in wdirfilenodeids:
1170 1172 raise error.WdirUnsupported
1171 1173 raise LookupError(node, self.indexfile, _('no node'))
1172 1174
1173 1175 # Accessors for index entries.
1174 1176
1175 1177 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1176 1178 # are flags.
1177 1179 def start(self, rev):
1178 1180 return int(self.index[rev][0] >> 16)
1179 1181
1180 1182 def flags(self, rev):
1181 1183 return self.index[rev][0] & 0xFFFF
1182 1184
1183 1185 def length(self, rev):
1184 1186 return self.index[rev][1]
1185 1187
1186 1188 def rawsize(self, rev):
1187 1189 """return the length of the uncompressed text for a given revision"""
1188 1190 l = self.index[rev][2]
1189 1191 if l >= 0:
1190 1192 return l
1191 1193
1192 1194 t = self.revision(rev, raw=True)
1193 1195 return len(t)
1194 1196
1195 1197 def size(self, rev):
1196 1198 """length of non-raw text (processed by a "read" flag processor)"""
1197 1199 # fast path: if no "read" flag processor could change the content,
1198 1200 # size is rawsize. note: ELLIPSIS is known to not change the content.
1199 1201 flags = self.flags(rev)
1200 1202 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1201 1203 return self.rawsize(rev)
1202 1204
1203 1205 return len(self.revision(rev, raw=False))
1204 1206
1205 1207 def chainbase(self, rev):
1206 1208 base = self._chainbasecache.get(rev)
1207 1209 if base is not None:
1208 1210 return base
1209 1211
1210 1212 index = self.index
1211 1213 iterrev = rev
1212 1214 base = index[iterrev][3]
1213 1215 while base != iterrev:
1214 1216 iterrev = base
1215 1217 base = index[iterrev][3]
1216 1218
1217 1219 self._chainbasecache[rev] = base
1218 1220 return base
1219 1221
1220 1222 def linkrev(self, rev):
1221 1223 return self.index[rev][4]
1222 1224
1223 1225 def parentrevs(self, rev):
1224 1226 try:
1225 1227 entry = self.index[rev]
1226 1228 except IndexError:
1227 1229 if rev == wdirrev:
1228 1230 raise error.WdirUnsupported
1229 1231 raise
1230 1232
1231 1233 return entry[5], entry[6]
1232 1234
1233 1235 def node(self, rev):
1234 1236 try:
1235 1237 return self.index[rev][7]
1236 1238 except IndexError:
1237 1239 if rev == wdirrev:
1238 1240 raise error.WdirUnsupported
1239 1241 raise
1240 1242
1241 1243 # Derived from index values.
1242 1244
1243 1245 def end(self, rev):
1244 1246 return self.start(rev) + self.length(rev)
1245 1247
1246 1248 def parents(self, node):
1247 1249 i = self.index
1248 1250 d = i[self.rev(node)]
1249 1251 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1250 1252
1251 1253 def chainlen(self, rev):
1252 1254 return self._chaininfo(rev)[0]
1253 1255
1254 1256 def _chaininfo(self, rev):
1255 1257 chaininfocache = self._chaininfocache
1256 1258 if rev in chaininfocache:
1257 1259 return chaininfocache[rev]
1258 1260 index = self.index
1259 1261 generaldelta = self._generaldelta
1260 1262 iterrev = rev
1261 1263 e = index[iterrev]
1262 1264 clen = 0
1263 1265 compresseddeltalen = 0
1264 1266 while iterrev != e[3]:
1265 1267 clen += 1
1266 1268 compresseddeltalen += e[1]
1267 1269 if generaldelta:
1268 1270 iterrev = e[3]
1269 1271 else:
1270 1272 iterrev -= 1
1271 1273 if iterrev in chaininfocache:
1272 1274 t = chaininfocache[iterrev]
1273 1275 clen += t[0]
1274 1276 compresseddeltalen += t[1]
1275 1277 break
1276 1278 e = index[iterrev]
1277 1279 else:
1278 1280 # Add text length of base since decompressing that also takes
1279 1281 # work. For cache hits the length is already included.
1280 1282 compresseddeltalen += e[1]
1281 1283 r = (clen, compresseddeltalen)
1282 1284 chaininfocache[rev] = r
1283 1285 return r
1284 1286
1285 1287 def _deltachain(self, rev, stoprev=None):
1286 1288 """Obtain the delta chain for a revision.
1287 1289
1288 1290 ``stoprev`` specifies a revision to stop at. If not specified, we
1289 1291 stop at the base of the chain.
1290 1292
1291 1293 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1292 1294 revs in ascending order and ``stopped`` is a bool indicating whether
1293 1295 ``stoprev`` was hit.
1294 1296 """
1295 1297 # Try C implementation.
1296 1298 try:
1297 1299 return self.index.deltachain(rev, stoprev, self._generaldelta)
1298 1300 except AttributeError:
1299 1301 pass
1300 1302
1301 1303 chain = []
1302 1304
1303 1305 # Alias to prevent attribute lookup in tight loop.
1304 1306 index = self.index
1305 1307 generaldelta = self._generaldelta
1306 1308
1307 1309 iterrev = rev
1308 1310 e = index[iterrev]
1309 1311 while iterrev != e[3] and iterrev != stoprev:
1310 1312 chain.append(iterrev)
1311 1313 if generaldelta:
1312 1314 iterrev = e[3]
1313 1315 else:
1314 1316 iterrev -= 1
1315 1317 e = index[iterrev]
1316 1318
1317 1319 if iterrev == stoprev:
1318 1320 stopped = True
1319 1321 else:
1320 1322 chain.append(iterrev)
1321 1323 stopped = False
1322 1324
1323 1325 chain.reverse()
1324 1326 return chain, stopped
1325 1327
1326 1328 def ancestors(self, revs, stoprev=0, inclusive=False):
1327 1329 """Generate the ancestors of 'revs' in reverse topological order.
1328 1330 Does not generate revs lower than stoprev.
1329 1331
1330 1332 See the documentation for ancestor.lazyancestors for more details."""
1331 1333
1332 1334 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1333 1335 inclusive=inclusive)
1334 1336
1335 1337 def descendants(self, revs):
1336 1338 """Generate the descendants of 'revs' in revision order.
1337 1339
1338 1340 Yield a sequence of revision numbers starting with a child of
1339 1341 some rev in revs, i.e., each revision is *not* considered a
1340 1342 descendant of itself. Results are ordered by revision number (a
1341 1343 topological sort)."""
1342 1344 first = min(revs)
1343 1345 if first == nullrev:
1344 1346 for i in self:
1345 1347 yield i
1346 1348 return
1347 1349
1348 1350 seen = set(revs)
1349 1351 for i in self.revs(start=first + 1):
1350 1352 for x in self.parentrevs(i):
1351 1353 if x != nullrev and x in seen:
1352 1354 seen.add(i)
1353 1355 yield i
1354 1356 break
1355 1357
1356 1358 def findcommonmissing(self, common=None, heads=None):
1357 1359 """Return a tuple of the ancestors of common and the ancestors of heads
1358 1360 that are not ancestors of common. In revset terminology, we return the
1359 1361 tuple:
1360 1362
1361 1363 ::common, (::heads) - (::common)
1362 1364
1363 1365 The list is sorted by revision number, meaning it is
1364 1366 topologically sorted.
1365 1367
1366 1368 'heads' and 'common' are both lists of node IDs. If heads is
1367 1369 not supplied, uses all of the revlog's heads. If common is not
1368 1370 supplied, uses nullid."""
1369 1371 if common is None:
1370 1372 common = [nullid]
1371 1373 if heads is None:
1372 1374 heads = self.heads()
1373 1375
1374 1376 common = [self.rev(n) for n in common]
1375 1377 heads = [self.rev(n) for n in heads]
1376 1378
1377 1379 # we want the ancestors, but inclusive
1378 1380 class lazyset(object):
1379 1381 def __init__(self, lazyvalues):
1380 1382 self.addedvalues = set()
1381 1383 self.lazyvalues = lazyvalues
1382 1384
1383 1385 def __contains__(self, value):
1384 1386 return value in self.addedvalues or value in self.lazyvalues
1385 1387
1386 1388 def __iter__(self):
1387 1389 added = self.addedvalues
1388 1390 for r in added:
1389 1391 yield r
1390 1392 for r in self.lazyvalues:
1391 1393 if not r in added:
1392 1394 yield r
1393 1395
1394 1396 def add(self, value):
1395 1397 self.addedvalues.add(value)
1396 1398
1397 1399 def update(self, values):
1398 1400 self.addedvalues.update(values)
1399 1401
1400 1402 has = lazyset(self.ancestors(common))
1401 1403 has.add(nullrev)
1402 1404 has.update(common)
1403 1405
1404 1406 # take all ancestors from heads that aren't in has
1405 1407 missing = set()
1406 1408 visit = collections.deque(r for r in heads if r not in has)
1407 1409 while visit:
1408 1410 r = visit.popleft()
1409 1411 if r in missing:
1410 1412 continue
1411 1413 else:
1412 1414 missing.add(r)
1413 1415 for p in self.parentrevs(r):
1414 1416 if p not in has:
1415 1417 visit.append(p)
1416 1418 missing = list(missing)
1417 1419 missing.sort()
1418 1420 return has, [self.node(miss) for miss in missing]
1419 1421
1420 1422 def incrementalmissingrevs(self, common=None):
1421 1423 """Return an object that can be used to incrementally compute the
1422 1424 revision numbers of the ancestors of arbitrary sets that are not
1423 1425 ancestors of common. This is an ancestor.incrementalmissingancestors
1424 1426 object.
1425 1427
1426 1428 'common' is a list of revision numbers. If common is not supplied, uses
1427 1429 nullrev.
1428 1430 """
1429 1431 if common is None:
1430 1432 common = [nullrev]
1431 1433
1432 1434 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1433 1435
1434 1436 def findmissingrevs(self, common=None, heads=None):
1435 1437 """Return the revision numbers of the ancestors of heads that
1436 1438 are not ancestors of common.
1437 1439
1438 1440 More specifically, return a list of revision numbers corresponding to
1439 1441 nodes N such that every N satisfies the following constraints:
1440 1442
1441 1443 1. N is an ancestor of some node in 'heads'
1442 1444 2. N is not an ancestor of any node in 'common'
1443 1445
1444 1446 The list is sorted by revision number, meaning it is
1445 1447 topologically sorted.
1446 1448
1447 1449 'heads' and 'common' are both lists of revision numbers. If heads is
1448 1450 not supplied, uses all of the revlog's heads. If common is not
1449 1451 supplied, uses nullid."""
1450 1452 if common is None:
1451 1453 common = [nullrev]
1452 1454 if heads is None:
1453 1455 heads = self.headrevs()
1454 1456
1455 1457 inc = self.incrementalmissingrevs(common=common)
1456 1458 return inc.missingancestors(heads)
1457 1459
1458 1460 def findmissing(self, common=None, heads=None):
1459 1461 """Return the ancestors of heads that are not ancestors of common.
1460 1462
1461 1463 More specifically, return a list of nodes N such that every N
1462 1464 satisfies the following constraints:
1463 1465
1464 1466 1. N is an ancestor of some node in 'heads'
1465 1467 2. N is not an ancestor of any node in 'common'
1466 1468
1467 1469 The list is sorted by revision number, meaning it is
1468 1470 topologically sorted.
1469 1471
1470 1472 'heads' and 'common' are both lists of node IDs. If heads is
1471 1473 not supplied, uses all of the revlog's heads. If common is not
1472 1474 supplied, uses nullid."""
1473 1475 if common is None:
1474 1476 common = [nullid]
1475 1477 if heads is None:
1476 1478 heads = self.heads()
1477 1479
1478 1480 common = [self.rev(n) for n in common]
1479 1481 heads = [self.rev(n) for n in heads]
1480 1482
1481 1483 inc = self.incrementalmissingrevs(common=common)
1482 1484 return [self.node(r) for r in inc.missingancestors(heads)]
1483 1485
1484 1486 def nodesbetween(self, roots=None, heads=None):
1485 1487 """Return a topological path from 'roots' to 'heads'.
1486 1488
1487 1489 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1488 1490 topologically sorted list of all nodes N that satisfy both of
1489 1491 these constraints:
1490 1492
1491 1493 1. N is a descendant of some node in 'roots'
1492 1494 2. N is an ancestor of some node in 'heads'
1493 1495
1494 1496 Every node is considered to be both a descendant and an ancestor
1495 1497 of itself, so every reachable node in 'roots' and 'heads' will be
1496 1498 included in 'nodes'.
1497 1499
1498 1500 'outroots' is the list of reachable nodes in 'roots', i.e., the
1499 1501 subset of 'roots' that is returned in 'nodes'. Likewise,
1500 1502 'outheads' is the subset of 'heads' that is also in 'nodes'.
1501 1503
1502 1504 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1503 1505 unspecified, uses nullid as the only root. If 'heads' is
1504 1506 unspecified, uses list of all of the revlog's heads."""
1505 1507 nonodes = ([], [], [])
1506 1508 if roots is not None:
1507 1509 roots = list(roots)
1508 1510 if not roots:
1509 1511 return nonodes
1510 1512 lowestrev = min([self.rev(n) for n in roots])
1511 1513 else:
1512 1514 roots = [nullid] # Everybody's a descendant of nullid
1513 1515 lowestrev = nullrev
1514 1516 if (lowestrev == nullrev) and (heads is None):
1515 1517 # We want _all_ the nodes!
1516 1518 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1517 1519 if heads is None:
1518 1520 # All nodes are ancestors, so the latest ancestor is the last
1519 1521 # node.
1520 1522 highestrev = len(self) - 1
1521 1523 # Set ancestors to None to signal that every node is an ancestor.
1522 1524 ancestors = None
1523 1525 # Set heads to an empty dictionary for later discovery of heads
1524 1526 heads = {}
1525 1527 else:
1526 1528 heads = list(heads)
1527 1529 if not heads:
1528 1530 return nonodes
1529 1531 ancestors = set()
1530 1532 # Turn heads into a dictionary so we can remove 'fake' heads.
1531 1533 # Also, later we will be using it to filter out the heads we can't
1532 1534 # find from roots.
1533 1535 heads = dict.fromkeys(heads, False)
1534 1536 # Start at the top and keep marking parents until we're done.
1535 1537 nodestotag = set(heads)
1536 1538 # Remember where the top was so we can use it as a limit later.
1537 1539 highestrev = max([self.rev(n) for n in nodestotag])
1538 1540 while nodestotag:
1539 1541 # grab a node to tag
1540 1542 n = nodestotag.pop()
1541 1543 # Never tag nullid
1542 1544 if n == nullid:
1543 1545 continue
1544 1546 # A node's revision number represents its place in a
1545 1547 # topologically sorted list of nodes.
1546 1548 r = self.rev(n)
1547 1549 if r >= lowestrev:
1548 1550 if n not in ancestors:
1549 1551 # If we are possibly a descendant of one of the roots
1550 1552 # and we haven't already been marked as an ancestor
1551 1553 ancestors.add(n) # Mark as ancestor
1552 1554 # Add non-nullid parents to list of nodes to tag.
1553 1555 nodestotag.update([p for p in self.parents(n) if
1554 1556 p != nullid])
1555 1557 elif n in heads: # We've seen it before, is it a fake head?
1556 1558 # So it is, real heads should not be the ancestors of
1557 1559 # any other heads.
1558 1560 heads.pop(n)
1559 1561 if not ancestors:
1560 1562 return nonodes
1561 1563 # Now that we have our set of ancestors, we want to remove any
1562 1564 # roots that are not ancestors.
1563 1565
1564 1566 # If one of the roots was nullid, everything is included anyway.
1565 1567 if lowestrev > nullrev:
1566 1568 # But, since we weren't, let's recompute the lowest rev to not
1567 1569 # include roots that aren't ancestors.
1568 1570
1569 1571 # Filter out roots that aren't ancestors of heads
1570 1572 roots = [root for root in roots if root in ancestors]
1571 1573 # Recompute the lowest revision
1572 1574 if roots:
1573 1575 lowestrev = min([self.rev(root) for root in roots])
1574 1576 else:
1575 1577 # No more roots? Return empty list
1576 1578 return nonodes
1577 1579 else:
1578 1580 # We are descending from nullid, and don't need to care about
1579 1581 # any other roots.
1580 1582 lowestrev = nullrev
1581 1583 roots = [nullid]
1582 1584 # Transform our roots list into a set.
1583 1585 descendants = set(roots)
1584 1586 # Also, keep the original roots so we can filter out roots that aren't
1585 1587 # 'real' roots (i.e. are descended from other roots).
1586 1588 roots = descendants.copy()
1587 1589 # Our topologically sorted list of output nodes.
1588 1590 orderedout = []
1589 1591 # Don't start at nullid since we don't want nullid in our output list,
1590 1592 # and if nullid shows up in descendants, empty parents will look like
1591 1593 # they're descendants.
1592 1594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1593 1595 n = self.node(r)
1594 1596 isdescendant = False
1595 1597 if lowestrev == nullrev: # Everybody is a descendant of nullid
1596 1598 isdescendant = True
1597 1599 elif n in descendants:
1598 1600 # n is already a descendant
1599 1601 isdescendant = True
1600 1602 # This check only needs to be done here because all the roots
1601 1603 # will start being marked is descendants before the loop.
1602 1604 if n in roots:
1603 1605 # If n was a root, check if it's a 'real' root.
1604 1606 p = tuple(self.parents(n))
1605 1607 # If any of its parents are descendants, it's not a root.
1606 1608 if (p[0] in descendants) or (p[1] in descendants):
1607 1609 roots.remove(n)
1608 1610 else:
1609 1611 p = tuple(self.parents(n))
1610 1612 # A node is a descendant if either of its parents are
1611 1613 # descendants. (We seeded the dependents list with the roots
1612 1614 # up there, remember?)
1613 1615 if (p[0] in descendants) or (p[1] in descendants):
1614 1616 descendants.add(n)
1615 1617 isdescendant = True
1616 1618 if isdescendant and ((ancestors is None) or (n in ancestors)):
1617 1619 # Only include nodes that are both descendants and ancestors.
1618 1620 orderedout.append(n)
1619 1621 if (ancestors is not None) and (n in heads):
1620 1622 # We're trying to figure out which heads are reachable
1621 1623 # from roots.
1622 1624 # Mark this head as having been reached
1623 1625 heads[n] = True
1624 1626 elif ancestors is None:
1625 1627 # Otherwise, we're trying to discover the heads.
1626 1628 # Assume this is a head because if it isn't, the next step
1627 1629 # will eventually remove it.
1628 1630 heads[n] = True
1629 1631 # But, obviously its parents aren't.
1630 1632 for p in self.parents(n):
1631 1633 heads.pop(p, None)
1632 1634 heads = [head for head, flag in heads.iteritems() if flag]
1633 1635 roots = list(roots)
1634 1636 assert orderedout
1635 1637 assert roots
1636 1638 assert heads
1637 1639 return (orderedout, roots, heads)
1638 1640
1639 1641 def headrevs(self):
1640 1642 try:
1641 1643 return self.index.headrevs()
1642 1644 except AttributeError:
1643 1645 return self._headrevs()
1644 1646
1645 1647 def computephases(self, roots):
1646 1648 return self.index.computephasesmapsets(roots)
1647 1649
1648 1650 def _headrevs(self):
1649 1651 count = len(self)
1650 1652 if not count:
1651 1653 return [nullrev]
1652 1654 # we won't iter over filtered rev so nobody is a head at start
1653 1655 ishead = [0] * (count + 1)
1654 1656 index = self.index
1655 1657 for r in self:
1656 1658 ishead[r] = 1 # I may be an head
1657 1659 e = index[r]
1658 1660 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1659 1661 return [r for r, val in enumerate(ishead) if val]
1660 1662
1661 1663 def heads(self, start=None, stop=None):
1662 1664 """return the list of all nodes that have no children
1663 1665
1664 1666 if start is specified, only heads that are descendants of
1665 1667 start will be returned
1666 1668 if stop is specified, it will consider all the revs from stop
1667 1669 as if they had no children
1668 1670 """
1669 1671 if start is None and stop is None:
1670 1672 if not len(self):
1671 1673 return [nullid]
1672 1674 return [self.node(r) for r in self.headrevs()]
1673 1675
1674 1676 if start is None:
1675 1677 start = nullid
1676 1678 if stop is None:
1677 1679 stop = []
1678 1680 stoprevs = set([self.rev(n) for n in stop])
1679 1681 startrev = self.rev(start)
1680 1682 reachable = {startrev}
1681 1683 heads = {startrev}
1682 1684
1683 1685 parentrevs = self.parentrevs
1684 1686 for r in self.revs(start=startrev + 1):
1685 1687 for p in parentrevs(r):
1686 1688 if p in reachable:
1687 1689 if r not in stoprevs:
1688 1690 reachable.add(r)
1689 1691 heads.add(r)
1690 1692 if p in heads and p not in stoprevs:
1691 1693 heads.remove(p)
1692 1694
1693 1695 return [self.node(r) for r in heads]
1694 1696
1695 1697 def children(self, node):
1696 1698 """find the children of a given node"""
1697 1699 c = []
1698 1700 p = self.rev(node)
1699 1701 for r in self.revs(start=p + 1):
1700 1702 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1701 1703 if prevs:
1702 1704 for pr in prevs:
1703 1705 if pr == p:
1704 1706 c.append(self.node(r))
1705 1707 elif p == nullrev:
1706 1708 c.append(self.node(r))
1707 1709 return c
1708 1710
1709 1711 def commonancestorsheads(self, a, b):
1710 1712 """calculate all the heads of the common ancestors of nodes a and b"""
1711 1713 a, b = self.rev(a), self.rev(b)
1712 1714 ancs = self._commonancestorsheads(a, b)
1713 1715 return pycompat.maplist(self.node, ancs)
1714 1716
1715 1717 def _commonancestorsheads(self, *revs):
1716 1718 """calculate all the heads of the common ancestors of revs"""
1717 1719 try:
1718 1720 ancs = self.index.commonancestorsheads(*revs)
1719 1721 except (AttributeError, OverflowError): # C implementation failed
1720 1722 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1721 1723 return ancs
1722 1724
1723 1725 def isancestor(self, a, b):
1724 1726 """return True if node a is an ancestor of node b
1725 1727
1726 1728 A revision is considered an ancestor of itself."""
1727 1729 a, b = self.rev(a), self.rev(b)
1728 1730 return self.isancestorrev(a, b)
1729 1731
1730 1732 def isancestorrev(self, a, b):
1731 1733 """return True if revision a is an ancestor of revision b
1732 1734
1733 1735 A revision is considered an ancestor of itself.
1734 1736
1735 1737 The implementation of this is trivial but the use of
1736 1738 commonancestorsheads is not."""
1737 1739 if a == nullrev:
1738 1740 return True
1739 1741 elif a == b:
1740 1742 return True
1741 1743 elif a > b:
1742 1744 return False
1743 1745 return a in self._commonancestorsheads(a, b)
1744 1746
1745 1747 def ancestor(self, a, b):
1746 1748 """calculate the "best" common ancestor of nodes a and b"""
1747 1749
1748 1750 a, b = self.rev(a), self.rev(b)
1749 1751 try:
1750 1752 ancs = self.index.ancestors(a, b)
1751 1753 except (AttributeError, OverflowError):
1752 1754 ancs = ancestor.ancestors(self.parentrevs, a, b)
1753 1755 if ancs:
1754 1756 # choose a consistent winner when there's a tie
1755 1757 return min(map(self.node, ancs))
1756 1758 return nullid
1757 1759
1758 1760 def _match(self, id):
1759 1761 if isinstance(id, int):
1760 1762 # rev
1761 1763 return self.node(id)
1762 1764 if len(id) == 20:
1763 1765 # possibly a binary node
1764 1766 # odds of a binary node being all hex in ASCII are 1 in 10**25
1765 1767 try:
1766 1768 node = id
1767 1769 self.rev(node) # quick search the index
1768 1770 return node
1769 1771 except LookupError:
1770 1772 pass # may be partial hex id
1771 1773 try:
1772 1774 # str(rev)
1773 1775 rev = int(id)
1774 1776 if "%d" % rev != id:
1775 1777 raise ValueError
1776 1778 if rev < 0:
1777 1779 rev = len(self) + rev
1778 1780 if rev < 0 or rev >= len(self):
1779 1781 raise ValueError
1780 1782 return self.node(rev)
1781 1783 except (ValueError, OverflowError):
1782 1784 pass
1783 1785 if len(id) == 40:
1784 1786 try:
1785 1787 # a full hex nodeid?
1786 1788 node = bin(id)
1787 1789 self.rev(node)
1788 1790 return node
1789 1791 except (TypeError, LookupError):
1790 1792 pass
1791 1793
1792 1794 def _partialmatch(self, id):
1793 1795 # we don't care wdirfilenodeids as they should be always full hash
1794 1796 maybewdir = wdirhex.startswith(id)
1795 1797 try:
1796 1798 partial = self.index.partialmatch(id)
1797 1799 if partial and self.hasnode(partial):
1798 1800 if maybewdir:
1799 1801 # single 'ff...' match in radix tree, ambiguous with wdir
1800 1802 raise RevlogError
1801 1803 return partial
1802 1804 if maybewdir:
1803 1805 # no 'ff...' match in radix tree, wdir identified
1804 1806 raise error.WdirUnsupported
1805 1807 return None
1806 1808 except RevlogError:
1807 1809 # parsers.c radix tree lookup gave multiple matches
1808 1810 # fast path: for unfiltered changelog, radix tree is accurate
1809 1811 if not getattr(self, 'filteredrevs', None):
1810 1812 raise AmbiguousPrefixLookupError(id, self.indexfile,
1811 1813 _('ambiguous identifier'))
1812 1814 # fall through to slow path that filters hidden revisions
1813 1815 except (AttributeError, ValueError):
1814 1816 # we are pure python, or key was too short to search radix tree
1815 1817 pass
1816 1818
1817 1819 if id in self._pcache:
1818 1820 return self._pcache[id]
1819 1821
1820 1822 if len(id) <= 40:
1821 1823 try:
1822 1824 # hex(node)[:...]
1823 1825 l = len(id) // 2 # grab an even number of digits
1824 1826 prefix = bin(id[:l * 2])
1825 1827 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1826 1828 nl = [n for n in nl if hex(n).startswith(id) and
1827 1829 self.hasnode(n)]
1828 1830 if len(nl) > 0:
1829 1831 if len(nl) == 1 and not maybewdir:
1830 1832 self._pcache[id] = nl[0]
1831 1833 return nl[0]
1832 1834 raise AmbiguousPrefixLookupError(id, self.indexfile,
1833 1835 _('ambiguous identifier'))
1834 1836 if maybewdir:
1835 1837 raise error.WdirUnsupported
1836 1838 return None
1837 1839 except TypeError:
1838 1840 pass
1839 1841
1840 1842 def lookup(self, id):
1841 1843 """locate a node based on:
1842 1844 - revision number or str(revision number)
1843 1845 - nodeid or subset of hex nodeid
1844 1846 """
1845 1847 n = self._match(id)
1846 1848 if n is not None:
1847 1849 return n
1848 1850 n = self._partialmatch(id)
1849 1851 if n:
1850 1852 return n
1851 1853
1852 1854 raise LookupError(id, self.indexfile, _('no match found'))
1853 1855
1854 1856 def shortest(self, node, minlength=1):
1855 1857 """Find the shortest unambiguous prefix that matches node."""
1856 1858 def isvalid(prefix):
1857 1859 try:
1858 1860 node = self._partialmatch(prefix)
1859 1861 except error.RevlogError:
1860 1862 return False
1861 1863 except error.WdirUnsupported:
1862 1864 # single 'ff...' match
1863 1865 return True
1864 1866 if node is None:
1865 1867 raise LookupError(node, self.indexfile, _('no node'))
1866 1868 return True
1867 1869
1868 1870 def maybewdir(prefix):
1869 1871 return all(c == 'f' for c in prefix)
1870 1872
1871 1873 hexnode = hex(node)
1872 1874
1873 1875 def disambiguate(hexnode, minlength):
1874 1876 """Disambiguate against wdirid."""
1875 1877 for length in range(minlength, 41):
1876 1878 prefix = hexnode[:length]
1877 1879 if not maybewdir(prefix):
1878 1880 return prefix
1879 1881
1880 1882 if not getattr(self, 'filteredrevs', None):
1881 1883 try:
1882 1884 length = max(self.index.shortest(node), minlength)
1883 1885 return disambiguate(hexnode, length)
1884 1886 except RevlogError:
1885 1887 if node != wdirid:
1886 1888 raise LookupError(node, self.indexfile, _('no node'))
1887 1889 except AttributeError:
1888 1890 # Fall through to pure code
1889 1891 pass
1890 1892
1891 1893 if node == wdirid:
1892 1894 for length in range(minlength, 41):
1893 1895 prefix = hexnode[:length]
1894 1896 if isvalid(prefix):
1895 1897 return prefix
1896 1898
1897 1899 for length in range(minlength, 41):
1898 1900 prefix = hexnode[:length]
1899 1901 if isvalid(prefix):
1900 1902 return disambiguate(hexnode, length)
1901 1903
1902 1904 def cmp(self, node, text):
1903 1905 """compare text with a given file revision
1904 1906
1905 1907 returns True if text is different than what is stored.
1906 1908 """
1907 1909 p1, p2 = self.parents(node)
1908 1910 return hash(text, p1, p2) != node
1909 1911
1910 1912 def _cachesegment(self, offset, data):
1911 1913 """Add a segment to the revlog cache.
1912 1914
1913 1915 Accepts an absolute offset and the data that is at that location.
1914 1916 """
1915 1917 o, d = self._chunkcache
1916 1918 # try to add to existing cache
1917 1919 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1918 1920 self._chunkcache = o, d + data
1919 1921 else:
1920 1922 self._chunkcache = offset, data
1921 1923
1922 1924 def _readsegment(self, offset, length, df=None):
1923 1925 """Load a segment of raw data from the revlog.
1924 1926
1925 1927 Accepts an absolute offset, length to read, and an optional existing
1926 1928 file handle to read from.
1927 1929
1928 1930 If an existing file handle is passed, it will be seeked and the
1929 1931 original seek position will NOT be restored.
1930 1932
1931 1933 Returns a str or buffer of raw byte data.
1932 1934 """
1933 1935 # Cache data both forward and backward around the requested
1934 1936 # data, in a fixed size window. This helps speed up operations
1935 1937 # involving reading the revlog backwards.
1936 1938 cachesize = self._chunkcachesize
1937 1939 realoffset = offset & ~(cachesize - 1)
1938 1940 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1939 1941 - realoffset)
1940 1942 with self._datareadfp(df) as df:
1941 1943 df.seek(realoffset)
1942 1944 d = df.read(reallength)
1943 1945 self._cachesegment(realoffset, d)
1944 1946 if offset != realoffset or reallength != length:
1945 1947 return util.buffer(d, offset - realoffset, length)
1946 1948 return d
1947 1949
1948 1950 def _getsegment(self, offset, length, df=None):
1949 1951 """Obtain a segment of raw data from the revlog.
1950 1952
1951 1953 Accepts an absolute offset, length of bytes to obtain, and an
1952 1954 optional file handle to the already-opened revlog. If the file
1953 1955 handle is used, it's original seek position will not be preserved.
1954 1956
1955 1957 Requests for data may be returned from a cache.
1956 1958
1957 1959 Returns a str or a buffer instance of raw byte data.
1958 1960 """
1959 1961 o, d = self._chunkcache
1960 1962 l = len(d)
1961 1963
1962 1964 # is it in the cache?
1963 1965 cachestart = offset - o
1964 1966 cacheend = cachestart + length
1965 1967 if cachestart >= 0 and cacheend <= l:
1966 1968 if cachestart == 0 and cacheend == l:
1967 1969 return d # avoid a copy
1968 1970 return util.buffer(d, cachestart, cacheend - cachestart)
1969 1971
1970 1972 return self._readsegment(offset, length, df=df)
1971 1973
1972 1974 def _getsegmentforrevs(self, startrev, endrev, df=None):
1973 1975 """Obtain a segment of raw data corresponding to a range of revisions.
1974 1976
1975 1977 Accepts the start and end revisions and an optional already-open
1976 1978 file handle to be used for reading. If the file handle is read, its
1977 1979 seek position will not be preserved.
1978 1980
1979 1981 Requests for data may be satisfied by a cache.
1980 1982
1981 1983 Returns a 2-tuple of (offset, data) for the requested range of
1982 1984 revisions. Offset is the integer offset from the beginning of the
1983 1985 revlog and data is a str or buffer of the raw byte data.
1984 1986
1985 1987 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1986 1988 to determine where each revision's data begins and ends.
1987 1989 """
1988 1990 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1989 1991 # (functions are expensive).
1990 1992 index = self.index
1991 1993 istart = index[startrev]
1992 1994 start = int(istart[0] >> 16)
1993 1995 if startrev == endrev:
1994 1996 end = start + istart[1]
1995 1997 else:
1996 1998 iend = index[endrev]
1997 1999 end = int(iend[0] >> 16) + iend[1]
1998 2000
1999 2001 if self._inline:
2000 2002 start += (startrev + 1) * self._io.size
2001 2003 end += (endrev + 1) * self._io.size
2002 2004 length = end - start
2003 2005
2004 2006 return start, self._getsegment(start, length, df=df)
2005 2007
2006 2008 def _chunk(self, rev, df=None):
2007 2009 """Obtain a single decompressed chunk for a revision.
2008 2010
2009 2011 Accepts an integer revision and an optional already-open file handle
2010 2012 to be used for reading. If used, the seek position of the file will not
2011 2013 be preserved.
2012 2014
2013 2015 Returns a str holding uncompressed data for the requested revision.
2014 2016 """
2015 2017 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2016 2018
2017 2019 def _chunks(self, revs, df=None, targetsize=None):
2018 2020 """Obtain decompressed chunks for the specified revisions.
2019 2021
2020 2022 Accepts an iterable of numeric revisions that are assumed to be in
2021 2023 ascending order. Also accepts an optional already-open file handle
2022 2024 to be used for reading. If used, the seek position of the file will
2023 2025 not be preserved.
2024 2026
2025 2027 This function is similar to calling ``self._chunk()`` multiple times,
2026 2028 but is faster.
2027 2029
2028 2030 Returns a list with decompressed data for each requested revision.
2029 2031 """
2030 2032 if not revs:
2031 2033 return []
2032 2034 start = self.start
2033 2035 length = self.length
2034 2036 inline = self._inline
2035 2037 iosize = self._io.size
2036 2038 buffer = util.buffer
2037 2039
2038 2040 l = []
2039 2041 ladd = l.append
2040 2042
2041 2043 if not self._withsparseread:
2042 2044 slicedchunks = (revs,)
2043 2045 else:
2044 2046 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2045 2047
2046 2048 for revschunk in slicedchunks:
2047 2049 firstrev = revschunk[0]
2048 2050 # Skip trailing revisions with empty diff
2049 2051 for lastrev in revschunk[::-1]:
2050 2052 if length(lastrev) != 0:
2051 2053 break
2052 2054
2053 2055 try:
2054 2056 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2055 2057 except OverflowError:
2056 2058 # issue4215 - we can't cache a run of chunks greater than
2057 2059 # 2G on Windows
2058 2060 return [self._chunk(rev, df=df) for rev in revschunk]
2059 2061
2060 2062 decomp = self.decompress
2061 2063 for rev in revschunk:
2062 2064 chunkstart = start(rev)
2063 2065 if inline:
2064 2066 chunkstart += (rev + 1) * iosize
2065 2067 chunklength = length(rev)
2066 2068 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2067 2069
2068 2070 return l
2069 2071
2070 2072 def _chunkclear(self):
2071 2073 """Clear the raw chunk cache."""
2072 2074 self._chunkcache = (0, '')
2073 2075
2074 2076 def deltaparent(self, rev):
2075 2077 """return deltaparent of the given revision"""
2076 2078 base = self.index[rev][3]
2077 2079 if base == rev:
2078 2080 return nullrev
2079 2081 elif self._generaldelta:
2080 2082 return base
2081 2083 else:
2082 2084 return rev - 1
2083 2085
2084 2086 def revdiff(self, rev1, rev2):
2085 2087 """return or calculate a delta between two revisions
2086 2088
2087 2089 The delta calculated is in binary form and is intended to be written to
2088 2090 revlog data directly. So this function needs raw revision data.
2089 2091 """
2090 2092 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2091 2093 return bytes(self._chunk(rev2))
2092 2094
2093 2095 return mdiff.textdiff(self.revision(rev1, raw=True),
2094 2096 self.revision(rev2, raw=True))
2095 2097
2096 2098 def revision(self, nodeorrev, _df=None, raw=False):
2097 2099 """return an uncompressed revision of a given node or revision
2098 2100 number.
2099 2101
2100 2102 _df - an existing file handle to read from. (internal-only)
2101 2103 raw - an optional argument specifying if the revision data is to be
2102 2104 treated as raw data when applying flag transforms. 'raw' should be set
2103 2105 to True when generating changegroups or in debug commands.
2104 2106 """
2105 2107 if isinstance(nodeorrev, int):
2106 2108 rev = nodeorrev
2107 2109 node = self.node(rev)
2108 2110 else:
2109 2111 node = nodeorrev
2110 2112 rev = None
2111 2113
2112 2114 cachedrev = None
2113 2115 flags = None
2114 2116 rawtext = None
2115 2117 if node == nullid:
2116 2118 return ""
2117 2119 if self._cache:
2118 2120 if self._cache[0] == node:
2119 2121 # _cache only stores rawtext
2120 2122 if raw:
2121 2123 return self._cache[2]
2122 2124 # duplicated, but good for perf
2123 2125 if rev is None:
2124 2126 rev = self.rev(node)
2125 2127 if flags is None:
2126 2128 flags = self.flags(rev)
2127 2129 # no extra flags set, no flag processor runs, text = rawtext
2128 2130 if flags == REVIDX_DEFAULT_FLAGS:
2129 2131 return self._cache[2]
2130 2132 # rawtext is reusable. need to run flag processor
2131 2133 rawtext = self._cache[2]
2132 2134
2133 2135 cachedrev = self._cache[1]
2134 2136
2135 2137 # look up what we need to read
2136 2138 if rawtext is None:
2137 2139 if rev is None:
2138 2140 rev = self.rev(node)
2139 2141
2140 2142 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2141 2143 if stopped:
2142 2144 rawtext = self._cache[2]
2143 2145
2144 2146 # drop cache to save memory
2145 2147 self._cache = None
2146 2148
2147 2149 targetsize = None
2148 2150 rawsize = self.index[rev][2]
2149 2151 if 0 <= rawsize:
2150 2152 targetsize = 4 * rawsize
2151 2153
2152 2154 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2153 2155 if rawtext is None:
2154 2156 rawtext = bytes(bins[0])
2155 2157 bins = bins[1:]
2156 2158
2157 2159 rawtext = mdiff.patches(rawtext, bins)
2158 2160 self._cache = (node, rev, rawtext)
2159 2161
2160 2162 if flags is None:
2161 2163 if rev is None:
2162 2164 rev = self.rev(node)
2163 2165 flags = self.flags(rev)
2164 2166
2165 2167 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2166 2168 if validatehash:
2167 2169 self.checkhash(text, node, rev=rev)
2168 2170
2169 2171 return text
2170 2172
2171 2173 def hash(self, text, p1, p2):
2172 2174 """Compute a node hash.
2173 2175
2174 2176 Available as a function so that subclasses can replace the hash
2175 2177 as needed.
2176 2178 """
2177 2179 return hash(text, p1, p2)
2178 2180
2179 2181 def _processflags(self, text, flags, operation, raw=False):
2180 2182 """Inspect revision data flags and applies transforms defined by
2181 2183 registered flag processors.
2182 2184
2183 2185 ``text`` - the revision data to process
2184 2186 ``flags`` - the revision flags
2185 2187 ``operation`` - the operation being performed (read or write)
2186 2188 ``raw`` - an optional argument describing if the raw transform should be
2187 2189 applied.
2188 2190
2189 2191 This method processes the flags in the order (or reverse order if
2190 2192 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2191 2193 flag processors registered for present flags. The order of flags defined
2192 2194 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2193 2195
2194 2196 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2195 2197 processed text and ``validatehash`` is a bool indicating whether the
2196 2198 returned text should be checked for hash integrity.
2197 2199
2198 2200 Note: If the ``raw`` argument is set, it has precedence over the
2199 2201 operation and will only update the value of ``validatehash``.
2200 2202 """
2201 2203 # fast path: no flag processors will run
2202 2204 if flags == 0:
2203 2205 return text, True
2204 2206 if not operation in ('read', 'write'):
2205 2207 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2206 2208 # Check all flags are known.
2207 2209 if flags & ~REVIDX_KNOWN_FLAGS:
2208 2210 raise RevlogError(_("incompatible revision flag '%#x'") %
2209 2211 (flags & ~REVIDX_KNOWN_FLAGS))
2210 2212 validatehash = True
2211 2213 # Depending on the operation (read or write), the order might be
2212 2214 # reversed due to non-commutative transforms.
2213 2215 orderedflags = REVIDX_FLAGS_ORDER
2214 2216 if operation == 'write':
2215 2217 orderedflags = reversed(orderedflags)
2216 2218
2217 2219 for flag in orderedflags:
2218 2220 # If a flagprocessor has been registered for a known flag, apply the
2219 2221 # related operation transform and update result tuple.
2220 2222 if flag & flags:
2221 2223 vhash = True
2222 2224
2223 2225 if flag not in _flagprocessors:
2224 2226 message = _("missing processor for flag '%#x'") % (flag)
2225 2227 raise RevlogError(message)
2226 2228
2227 2229 processor = _flagprocessors[flag]
2228 2230 if processor is not None:
2229 2231 readtransform, writetransform, rawtransform = processor
2230 2232
2231 2233 if raw:
2232 2234 vhash = rawtransform(self, text)
2233 2235 elif operation == 'read':
2234 2236 text, vhash = readtransform(self, text)
2235 2237 else: # write operation
2236 2238 text, vhash = writetransform(self, text)
2237 2239 validatehash = validatehash and vhash
2238 2240
2239 2241 return text, validatehash
2240 2242
2241 2243 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2242 2244 """Check node hash integrity.
2243 2245
2244 2246 Available as a function so that subclasses can extend hash mismatch
2245 2247 behaviors as needed.
2246 2248 """
2247 2249 try:
2248 2250 if p1 is None and p2 is None:
2249 2251 p1, p2 = self.parents(node)
2250 2252 if node != self.hash(text, p1, p2):
2251 2253 revornode = rev
2252 2254 if revornode is None:
2253 2255 revornode = templatefilters.short(hex(node))
2254 2256 raise RevlogError(_("integrity check failed on %s:%s")
2255 2257 % (self.indexfile, pycompat.bytestr(revornode)))
2256 2258 except RevlogError:
2257 2259 if self._censorable and _censoredtext(text):
2258 2260 raise error.CensoredNodeError(self.indexfile, node, text)
2259 2261 raise
2260 2262
2261 2263 def _enforceinlinesize(self, tr, fp=None):
2262 2264 """Check if the revlog is too big for inline and convert if so.
2263 2265
2264 2266 This should be called after revisions are added to the revlog. If the
2265 2267 revlog has grown too large to be an inline revlog, it will convert it
2266 2268 to use multiple index and data files.
2267 2269 """
2268 2270 tiprev = len(self) - 1
2269 2271 if (not self._inline or
2270 2272 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2271 2273 return
2272 2274
2273 2275 trinfo = tr.find(self.indexfile)
2274 2276 if trinfo is None:
2275 2277 raise RevlogError(_("%s not found in the transaction")
2276 2278 % self.indexfile)
2277 2279
2278 2280 trindex = trinfo[2]
2279 2281 if trindex is not None:
2280 2282 dataoff = self.start(trindex)
2281 2283 else:
2282 2284 # revlog was stripped at start of transaction, use all leftover data
2283 2285 trindex = len(self) - 1
2284 2286 dataoff = self.end(tiprev)
2285 2287
2286 2288 tr.add(self.datafile, dataoff)
2287 2289
2288 2290 if fp:
2289 2291 fp.flush()
2290 2292 fp.close()
2291 2293
2292 2294 with self._datafp('w') as df:
2293 2295 for r in self:
2294 2296 df.write(self._getsegmentforrevs(r, r)[1])
2295 2297
2296 2298 with self._indexfp('w') as fp:
2297 2299 self.version &= ~FLAG_INLINE_DATA
2298 2300 self._inline = False
2299 2301 io = self._io
2300 2302 for i in self:
2301 2303 e = io.packentry(self.index[i], self.node, self.version, i)
2302 2304 fp.write(e)
2303 2305
2304 2306 # the temp file replace the real index when we exit the context
2305 2307 # manager
2306 2308
2307 2309 tr.replace(self.indexfile, trindex * self._io.size)
2308 2310 self._chunkclear()
2309 2311
2310 2312 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2311 2313 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2312 2314 """add a revision to the log
2313 2315
2314 2316 text - the revision data to add
2315 2317 transaction - the transaction object used for rollback
2316 2318 link - the linkrev data to add
2317 2319 p1, p2 - the parent nodeids of the revision
2318 2320 cachedelta - an optional precomputed delta
2319 2321 node - nodeid of revision; typically node is not specified, and it is
2320 2322 computed by default as hash(text, p1, p2), however subclasses might
2321 2323 use different hashing method (and override checkhash() in such case)
2322 2324 flags - the known flags to set on the revision
2323 2325 deltacomputer - an optional _deltacomputer instance shared between
2324 2326 multiple calls
2325 2327 """
2326 2328 if link == nullrev:
2327 2329 raise RevlogError(_("attempted to add linkrev -1 to %s")
2328 2330 % self.indexfile)
2329 2331
2330 2332 if flags:
2331 2333 node = node or self.hash(text, p1, p2)
2332 2334
2333 2335 rawtext, validatehash = self._processflags(text, flags, 'write')
2334 2336
2335 2337 # If the flag processor modifies the revision data, ignore any provided
2336 2338 # cachedelta.
2337 2339 if rawtext != text:
2338 2340 cachedelta = None
2339 2341
2340 2342 if len(rawtext) > _maxentrysize:
2341 2343 raise RevlogError(
2342 2344 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2343 2345 % (self.indexfile, len(rawtext)))
2344 2346
2345 2347 node = node or self.hash(rawtext, p1, p2)
2346 2348 if node in self.nodemap:
2347 2349 return node
2348 2350
2349 2351 if validatehash:
2350 2352 self.checkhash(rawtext, node, p1=p1, p2=p2)
2351 2353
2352 2354 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2353 2355 flags, cachedelta=cachedelta,
2354 2356 deltacomputer=deltacomputer)
2355 2357
2356 2358 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2357 2359 cachedelta=None, deltacomputer=None):
2358 2360 """add a raw revision with known flags, node and parents
2359 2361 useful when reusing a revision not stored in this revlog (ex: received
2360 2362 over wire, or read from an external bundle).
2361 2363 """
2362 2364 dfh = None
2363 2365 if not self._inline:
2364 2366 dfh = self._datafp("a+")
2365 2367 ifh = self._indexfp("a+")
2366 2368 try:
2367 2369 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2368 2370 flags, cachedelta, ifh, dfh,
2369 2371 deltacomputer=deltacomputer)
2370 2372 finally:
2371 2373 if dfh:
2372 2374 dfh.close()
2373 2375 ifh.close()
2374 2376
2375 2377 def compress(self, data):
2376 2378 """Generate a possibly-compressed representation of data."""
2377 2379 if not data:
2378 2380 return '', data
2379 2381
2380 2382 compressed = self._compressor.compress(data)
2381 2383
2382 2384 if compressed:
2383 2385 # The revlog compressor added the header in the returned data.
2384 2386 return '', compressed
2385 2387
2386 2388 if data[0:1] == '\0':
2387 2389 return '', data
2388 2390 return 'u', data
2389 2391
2390 2392 def decompress(self, data):
2391 2393 """Decompress a revlog chunk.
2392 2394
2393 2395 The chunk is expected to begin with a header identifying the
2394 2396 format type so it can be routed to an appropriate decompressor.
2395 2397 """
2396 2398 if not data:
2397 2399 return data
2398 2400
2399 2401 # Revlogs are read much more frequently than they are written and many
2400 2402 # chunks only take microseconds to decompress, so performance is
2401 2403 # important here.
2402 2404 #
2403 2405 # We can make a few assumptions about revlogs:
2404 2406 #
2405 2407 # 1) the majority of chunks will be compressed (as opposed to inline
2406 2408 # raw data).
2407 2409 # 2) decompressing *any* data will likely by at least 10x slower than
2408 2410 # returning raw inline data.
2409 2411 # 3) we want to prioritize common and officially supported compression
2410 2412 # engines
2411 2413 #
2412 2414 # It follows that we want to optimize for "decompress compressed data
2413 2415 # when encoded with common and officially supported compression engines"
2414 2416 # case over "raw data" and "data encoded by less common or non-official
2415 2417 # compression engines." That is why we have the inline lookup first
2416 2418 # followed by the compengines lookup.
2417 2419 #
2418 2420 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2419 2421 # compressed chunks. And this matters for changelog and manifest reads.
2420 2422 t = data[0:1]
2421 2423
2422 2424 if t == 'x':
2423 2425 try:
2424 2426 return _zlibdecompress(data)
2425 2427 except zlib.error as e:
2426 2428 raise RevlogError(_('revlog decompress error: %s') %
2427 2429 stringutil.forcebytestr(e))
2428 2430 # '\0' is more common than 'u' so it goes first.
2429 2431 elif t == '\0':
2430 2432 return data
2431 2433 elif t == 'u':
2432 2434 return util.buffer(data, 1)
2433 2435
2434 2436 try:
2435 2437 compressor = self._decompressors[t]
2436 2438 except KeyError:
2437 2439 try:
2438 2440 engine = util.compengines.forrevlogheader(t)
2439 2441 compressor = engine.revlogcompressor()
2440 2442 self._decompressors[t] = compressor
2441 2443 except KeyError:
2442 2444 raise RevlogError(_('unknown compression type %r') % t)
2443 2445
2444 2446 return compressor.decompress(data)
2445 2447
2446 2448 def _isgooddeltainfo(self, deltainfo, revinfo):
2447 2449 """Returns True if the given delta is good. Good means that it is within
2448 2450 the disk span, disk size, and chain length bounds that we know to be
2449 2451 performant."""
2450 2452 if deltainfo is None:
2451 2453 return False
2452 2454
2453 2455 # - 'deltainfo.distance' is the distance from the base revision --
2454 2456 # bounding it limits the amount of I/O we need to do.
2455 2457 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2456 2458 # deltas we need to apply -- bounding it limits the amount of CPU
2457 2459 # we consume.
2458 2460
2459 2461 if self._sparserevlog:
2460 2462 # As sparse-read will be used, we can consider that the distance,
2461 2463 # instead of being the span of the whole chunk,
2462 2464 # is the span of the largest read chunk
2463 2465 base = deltainfo.base
2464 2466
2465 2467 if base != nullrev:
2466 2468 deltachain = self._deltachain(base)[0]
2467 2469 else:
2468 2470 deltachain = []
2469 2471
2470 2472 chunks = _slicechunk(self, deltachain, deltainfo)
2471 2473 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2472 2474 else:
2473 2475 distance = deltainfo.distance
2474 2476
2475 2477 textlen = revinfo.textlen
2476 2478 defaultmax = textlen * 4
2477 2479 maxdist = self._maxdeltachainspan
2478 2480 if not maxdist:
2479 2481 maxdist = distance # ensure the conditional pass
2480 2482 maxdist = max(maxdist, defaultmax)
2481 2483 if self._sparserevlog and maxdist < self._srmingapsize:
2482 2484 # In multiple place, we are ignoring irrelevant data range below a
2483 2485 # certain size. Be also apply this tradeoff here and relax span
2484 2486 # constraint for small enought content.
2485 2487 maxdist = self._srmingapsize
2486 2488
2487 2489 # Bad delta from read span:
2488 2490 #
2489 2491 # If the span of data read is larger than the maximum allowed.
2490 2492 if maxdist < distance:
2491 2493 return False
2492 2494
2493 2495 # Bad delta from new delta size:
2494 2496 #
2495 2497 # If the delta size is larger than the target text, storing the
2496 2498 # delta will be inefficient.
2497 2499 if textlen < deltainfo.deltalen:
2498 2500 return False
2499 2501
2500 2502 # Bad delta from cumulated payload size:
2501 2503 #
2502 2504 # If the sum of delta get larger than K * target text length.
2503 2505 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2504 2506 return False
2505 2507
2506 2508 # Bad delta from chain length:
2507 2509 #
2508 2510 # If the number of delta in the chain gets too high.
2509 2511 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2510 2512 return False
2511 2513
2512 2514 return True
2513 2515
2514 2516 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2515 2517 cachedelta, ifh, dfh, alwayscache=False,
2516 2518 deltacomputer=None):
2517 2519 """internal function to add revisions to the log
2518 2520
2519 2521 see addrevision for argument descriptions.
2520 2522
2521 2523 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2522 2524
2523 2525 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2524 2526 be used.
2525 2527
2526 2528 invariants:
2527 2529 - rawtext is optional (can be None); if not set, cachedelta must be set.
2528 2530 if both are set, they must correspond to each other.
2529 2531 """
2530 2532 if node == nullid:
2531 2533 raise RevlogError(_("%s: attempt to add null revision") %
2532 2534 (self.indexfile))
2533 2535 if node == wdirid or node in wdirfilenodeids:
2534 2536 raise RevlogError(_("%s: attempt to add wdir revision") %
2535 2537 (self.indexfile))
2536 2538
2537 2539 if self._inline:
2538 2540 fh = ifh
2539 2541 else:
2540 2542 fh = dfh
2541 2543
2542 2544 btext = [rawtext]
2543 2545
2544 2546 curr = len(self)
2545 2547 prev = curr - 1
2546 2548 offset = self.end(prev)
2547 2549 p1r, p2r = self.rev(p1), self.rev(p2)
2548 2550
2549 2551 # full versions are inserted when the needed deltas
2550 2552 # become comparable to the uncompressed text
2551 2553 if rawtext is None:
2552 2554 # need rawtext size, before changed by flag processors, which is
2553 2555 # the non-raw size. use revlog explicitly to avoid filelog's extra
2554 2556 # logic that might remove metadata size.
2555 2557 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2556 2558 cachedelta[1])
2557 2559 else:
2558 2560 textlen = len(rawtext)
2559 2561
2560 2562 if deltacomputer is None:
2561 2563 deltacomputer = _deltacomputer(self)
2562 2564
2563 2565 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2564 2566
2565 2567 # no delta for flag processor revision (see "candelta" for why)
2566 2568 # not calling candelta since only one revision needs test, also to
2567 2569 # avoid overhead fetching flags again.
2568 2570 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2569 2571 deltainfo = None
2570 2572 else:
2571 2573 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2572 2574
2573 2575 if deltainfo is not None:
2574 2576 base = deltainfo.base
2575 2577 chainbase = deltainfo.chainbase
2576 2578 data = deltainfo.data
2577 2579 l = deltainfo.deltalen
2578 2580 else:
2579 2581 rawtext = deltacomputer.buildtext(revinfo, fh)
2580 2582 data = self.compress(rawtext)
2581 2583 l = len(data[1]) + len(data[0])
2582 2584 base = chainbase = curr
2583 2585
2584 2586 e = (offset_type(offset, flags), l, textlen,
2585 2587 base, link, p1r, p2r, node)
2586 2588 self.index.append(e)
2587 2589 self.nodemap[node] = curr
2588 2590
2589 2591 entry = self._io.packentry(e, self.node, self.version, curr)
2590 2592 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2591 2593
2592 2594 if alwayscache and rawtext is None:
2593 2595 rawtext = deltacomputer._buildtext(revinfo, fh)
2594 2596
2595 2597 if type(rawtext) == bytes: # only accept immutable objects
2596 2598 self._cache = (node, curr, rawtext)
2597 2599 self._chainbasecache[curr] = chainbase
2598 2600 return node
2599 2601
2600 2602 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2601 2603 # Files opened in a+ mode have inconsistent behavior on various
2602 2604 # platforms. Windows requires that a file positioning call be made
2603 2605 # when the file handle transitions between reads and writes. See
2604 2606 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2605 2607 # platforms, Python or the platform itself can be buggy. Some versions
2606 2608 # of Solaris have been observed to not append at the end of the file
2607 2609 # if the file was seeked to before the end. See issue4943 for more.
2608 2610 #
2609 2611 # We work around this issue by inserting a seek() before writing.
2610 2612 # Note: This is likely not necessary on Python 3.
2611 2613 ifh.seek(0, os.SEEK_END)
2612 2614 if dfh:
2613 2615 dfh.seek(0, os.SEEK_END)
2614 2616
2615 2617 curr = len(self) - 1
2616 2618 if not self._inline:
2617 2619 transaction.add(self.datafile, offset)
2618 2620 transaction.add(self.indexfile, curr * len(entry))
2619 2621 if data[0]:
2620 2622 dfh.write(data[0])
2621 2623 dfh.write(data[1])
2622 2624 ifh.write(entry)
2623 2625 else:
2624 2626 offset += curr * self._io.size
2625 2627 transaction.add(self.indexfile, offset, curr)
2626 2628 ifh.write(entry)
2627 2629 ifh.write(data[0])
2628 2630 ifh.write(data[1])
2629 2631 self._enforceinlinesize(transaction, ifh)
2630 2632
2631 2633 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2632 2634 """
2633 2635 add a delta group
2634 2636
2635 2637 given a set of deltas, add them to the revision log. the
2636 2638 first delta is against its parent, which should be in our
2637 2639 log, the rest are against the previous delta.
2638 2640
2639 2641 If ``addrevisioncb`` is defined, it will be called with arguments of
2640 2642 this revlog and the node that was added.
2641 2643 """
2642 2644
2643 2645 nodes = []
2644 2646
2645 2647 r = len(self)
2646 2648 end = 0
2647 2649 if r:
2648 2650 end = self.end(r - 1)
2649 2651 ifh = self._indexfp("a+")
2650 2652 isize = r * self._io.size
2651 2653 if self._inline:
2652 2654 transaction.add(self.indexfile, end + isize, r)
2653 2655 dfh = None
2654 2656 else:
2655 2657 transaction.add(self.indexfile, isize, r)
2656 2658 transaction.add(self.datafile, end)
2657 2659 dfh = self._datafp("a+")
2658 2660 def flush():
2659 2661 if dfh:
2660 2662 dfh.flush()
2661 2663 ifh.flush()
2662 2664 try:
2663 2665 deltacomputer = _deltacomputer(self)
2664 2666 # loop through our set of deltas
2665 2667 for data in deltas:
2666 2668 node, p1, p2, linknode, deltabase, delta, flags = data
2667 2669 link = linkmapper(linknode)
2668 2670 flags = flags or REVIDX_DEFAULT_FLAGS
2669 2671
2670 2672 nodes.append(node)
2671 2673
2672 2674 if node in self.nodemap:
2673 2675 # this can happen if two branches make the same change
2674 2676 continue
2675 2677
2676 2678 for p in (p1, p2):
2677 2679 if p not in self.nodemap:
2678 2680 raise LookupError(p, self.indexfile,
2679 2681 _('unknown parent'))
2680 2682
2681 2683 if deltabase not in self.nodemap:
2682 2684 raise LookupError(deltabase, self.indexfile,
2683 2685 _('unknown delta base'))
2684 2686
2685 2687 baserev = self.rev(deltabase)
2686 2688
2687 2689 if baserev != nullrev and self.iscensored(baserev):
2688 2690 # if base is censored, delta must be full replacement in a
2689 2691 # single patch operation
2690 2692 hlen = struct.calcsize(">lll")
2691 2693 oldlen = self.rawsize(baserev)
2692 2694 newlen = len(delta) - hlen
2693 2695 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2694 2696 raise error.CensoredBaseError(self.indexfile,
2695 2697 self.node(baserev))
2696 2698
2697 2699 if not flags and self._peek_iscensored(baserev, delta, flush):
2698 2700 flags |= REVIDX_ISCENSORED
2699 2701
2700 2702 # We assume consumers of addrevisioncb will want to retrieve
2701 2703 # the added revision, which will require a call to
2702 2704 # revision(). revision() will fast path if there is a cache
2703 2705 # hit. So, we tell _addrevision() to always cache in this case.
2704 2706 # We're only using addgroup() in the context of changegroup
2705 2707 # generation so the revision data can always be handled as raw
2706 2708 # by the flagprocessor.
2707 2709 self._addrevision(node, None, transaction, link,
2708 2710 p1, p2, flags, (baserev, delta),
2709 2711 ifh, dfh,
2710 2712 alwayscache=bool(addrevisioncb),
2711 2713 deltacomputer=deltacomputer)
2712 2714
2713 2715 if addrevisioncb:
2714 2716 addrevisioncb(self, node)
2715 2717
2716 2718 if not dfh and not self._inline:
2717 2719 # addrevision switched from inline to conventional
2718 2720 # reopen the index
2719 2721 ifh.close()
2720 2722 dfh = self._datafp("a+")
2721 2723 ifh = self._indexfp("a+")
2722 2724 finally:
2723 2725 if dfh:
2724 2726 dfh.close()
2725 2727 ifh.close()
2726 2728
2727 2729 return nodes
2728 2730
2729 2731 def iscensored(self, rev):
2730 2732 """Check if a file revision is censored."""
2731 2733 if not self._censorable:
2732 2734 return False
2733 2735
2734 2736 return self.flags(rev) & REVIDX_ISCENSORED
2735 2737
2736 2738 def _peek_iscensored(self, baserev, delta, flush):
2737 2739 """Quickly check if a delta produces a censored revision."""
2738 2740 if not self._censorable:
2739 2741 return False
2740 2742
2741 2743 # Fragile heuristic: unless new file meta keys are added alphabetically
2742 2744 # preceding "censored", all censored revisions are prefixed by
2743 2745 # "\1\ncensored:". A delta producing such a censored revision must be a
2744 2746 # full-replacement delta, so we inspect the first and only patch in the
2745 2747 # delta for this prefix.
2746 2748 hlen = struct.calcsize(">lll")
2747 2749 if len(delta) <= hlen:
2748 2750 return False
2749 2751
2750 2752 oldlen = self.rawsize(baserev)
2751 2753 newlen = len(delta) - hlen
2752 2754 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2753 2755 return False
2754 2756
2755 2757 add = "\1\ncensored:"
2756 2758 addlen = len(add)
2757 2759 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2758 2760
2759 2761 def getstrippoint(self, minlink):
2760 2762 """find the minimum rev that must be stripped to strip the linkrev
2761 2763
2762 2764 Returns a tuple containing the minimum rev and a set of all revs that
2763 2765 have linkrevs that will be broken by this strip.
2764 2766 """
2765 2767 brokenrevs = set()
2766 2768 strippoint = len(self)
2767 2769
2768 2770 heads = {}
2769 2771 futurelargelinkrevs = set()
2770 2772 for head in self.headrevs():
2771 2773 headlinkrev = self.linkrev(head)
2772 2774 heads[head] = headlinkrev
2773 2775 if headlinkrev >= minlink:
2774 2776 futurelargelinkrevs.add(headlinkrev)
2775 2777
2776 2778 # This algorithm involves walking down the rev graph, starting at the
2777 2779 # heads. Since the revs are topologically sorted according to linkrev,
2778 2780 # once all head linkrevs are below the minlink, we know there are
2779 2781 # no more revs that could have a linkrev greater than minlink.
2780 2782 # So we can stop walking.
2781 2783 while futurelargelinkrevs:
2782 2784 strippoint -= 1
2783 2785 linkrev = heads.pop(strippoint)
2784 2786
2785 2787 if linkrev < minlink:
2786 2788 brokenrevs.add(strippoint)
2787 2789 else:
2788 2790 futurelargelinkrevs.remove(linkrev)
2789 2791
2790 2792 for p in self.parentrevs(strippoint):
2791 2793 if p != nullrev:
2792 2794 plinkrev = self.linkrev(p)
2793 2795 heads[p] = plinkrev
2794 2796 if plinkrev >= minlink:
2795 2797 futurelargelinkrevs.add(plinkrev)
2796 2798
2797 2799 return strippoint, brokenrevs
2798 2800
2799 2801 def strip(self, minlink, transaction):
2800 2802 """truncate the revlog on the first revision with a linkrev >= minlink
2801 2803
2802 2804 This function is called when we're stripping revision minlink and
2803 2805 its descendants from the repository.
2804 2806
2805 2807 We have to remove all revisions with linkrev >= minlink, because
2806 2808 the equivalent changelog revisions will be renumbered after the
2807 2809 strip.
2808 2810
2809 2811 So we truncate the revlog on the first of these revisions, and
2810 2812 trust that the caller has saved the revisions that shouldn't be
2811 2813 removed and that it'll re-add them after this truncation.
2812 2814 """
2813 2815 if len(self) == 0:
2814 2816 return
2815 2817
2816 2818 rev, _ = self.getstrippoint(minlink)
2817 2819 if rev == len(self):
2818 2820 return
2819 2821
2820 2822 # first truncate the files on disk
2821 2823 end = self.start(rev)
2822 2824 if not self._inline:
2823 2825 transaction.add(self.datafile, end)
2824 2826 end = rev * self._io.size
2825 2827 else:
2826 2828 end += rev * self._io.size
2827 2829
2828 2830 transaction.add(self.indexfile, end)
2829 2831
2830 2832 # then reset internal state in memory to forget those revisions
2831 2833 self._cache = None
2832 2834 self._chaininfocache = {}
2833 2835 self._chunkclear()
2834 2836 for x in pycompat.xrange(rev, len(self)):
2835 2837 del self.nodemap[self.node(x)]
2836 2838
2837 2839 del self.index[rev:-1]
2838 2840 self._nodepos = None
2839 2841
2840 2842 def checksize(self):
2841 2843 expected = 0
2842 2844 if len(self):
2843 2845 expected = max(0, self.end(len(self) - 1))
2844 2846
2845 2847 try:
2846 2848 with self._datafp() as f:
2847 2849 f.seek(0, 2)
2848 2850 actual = f.tell()
2849 2851 dd = actual - expected
2850 2852 except IOError as inst:
2851 2853 if inst.errno != errno.ENOENT:
2852 2854 raise
2853 2855 dd = 0
2854 2856
2855 2857 try:
2856 2858 f = self.opener(self.indexfile)
2857 2859 f.seek(0, 2)
2858 2860 actual = f.tell()
2859 2861 f.close()
2860 2862 s = self._io.size
2861 2863 i = max(0, actual // s)
2862 2864 di = actual - (i * s)
2863 2865 if self._inline:
2864 2866 databytes = 0
2865 2867 for r in self:
2866 2868 databytes += max(0, self.length(r))
2867 2869 dd = 0
2868 2870 di = actual - len(self) * s - databytes
2869 2871 except IOError as inst:
2870 2872 if inst.errno != errno.ENOENT:
2871 2873 raise
2872 2874 di = 0
2873 2875
2874 2876 return (dd, di)
2875 2877
2876 2878 def files(self):
2877 2879 res = [self.indexfile]
2878 2880 if not self._inline:
2879 2881 res.append(self.datafile)
2880 2882 return res
2881 2883
2882 2884 DELTAREUSEALWAYS = 'always'
2883 2885 DELTAREUSESAMEREVS = 'samerevs'
2884 2886 DELTAREUSENEVER = 'never'
2885 2887
2886 2888 DELTAREUSEFULLADD = 'fulladd'
2887 2889
2888 2890 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2889 2891
2890 2892 def clone(self, tr, destrevlog, addrevisioncb=None,
2891 2893 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2892 2894 """Copy this revlog to another, possibly with format changes.
2893 2895
2894 2896 The destination revlog will contain the same revisions and nodes.
2895 2897 However, it may not be bit-for-bit identical due to e.g. delta encoding
2896 2898 differences.
2897 2899
2898 2900 The ``deltareuse`` argument control how deltas from the existing revlog
2899 2901 are preserved in the destination revlog. The argument can have the
2900 2902 following values:
2901 2903
2902 2904 DELTAREUSEALWAYS
2903 2905 Deltas will always be reused (if possible), even if the destination
2904 2906 revlog would not select the same revisions for the delta. This is the
2905 2907 fastest mode of operation.
2906 2908 DELTAREUSESAMEREVS
2907 2909 Deltas will be reused if the destination revlog would pick the same
2908 2910 revisions for the delta. This mode strikes a balance between speed
2909 2911 and optimization.
2910 2912 DELTAREUSENEVER
2911 2913 Deltas will never be reused. This is the slowest mode of execution.
2912 2914 This mode can be used to recompute deltas (e.g. if the diff/delta
2913 2915 algorithm changes).
2914 2916
2915 2917 Delta computation can be slow, so the choice of delta reuse policy can
2916 2918 significantly affect run time.
2917 2919
2918 2920 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2919 2921 two extremes. Deltas will be reused if they are appropriate. But if the
2920 2922 delta could choose a better revision, it will do so. This means if you
2921 2923 are converting a non-generaldelta revlog to a generaldelta revlog,
2922 2924 deltas will be recomputed if the delta's parent isn't a parent of the
2923 2925 revision.
2924 2926
2925 2927 In addition to the delta policy, the ``deltabothparents`` argument
2926 2928 controls whether to compute deltas against both parents for merges.
2927 2929 By default, the current default is used.
2928 2930 """
2929 2931 if deltareuse not in self.DELTAREUSEALL:
2930 2932 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2931 2933
2932 2934 if len(destrevlog):
2933 2935 raise ValueError(_('destination revlog is not empty'))
2934 2936
2935 2937 if getattr(self, 'filteredrevs', None):
2936 2938 raise ValueError(_('source revlog has filtered revisions'))
2937 2939 if getattr(destrevlog, 'filteredrevs', None):
2938 2940 raise ValueError(_('destination revlog has filtered revisions'))
2939 2941
2940 2942 # lazydeltabase controls whether to reuse a cached delta, if possible.
2941 2943 oldlazydeltabase = destrevlog._lazydeltabase
2942 2944 oldamd = destrevlog._deltabothparents
2943 2945
2944 2946 try:
2945 2947 if deltareuse == self.DELTAREUSEALWAYS:
2946 2948 destrevlog._lazydeltabase = True
2947 2949 elif deltareuse == self.DELTAREUSESAMEREVS:
2948 2950 destrevlog._lazydeltabase = False
2949 2951
2950 2952 destrevlog._deltabothparents = deltabothparents or oldamd
2951 2953
2952 2954 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2953 2955 self.DELTAREUSESAMEREVS)
2954 2956
2955 2957 deltacomputer = _deltacomputer(destrevlog)
2956 2958 index = self.index
2957 2959 for rev in self:
2958 2960 entry = index[rev]
2959 2961
2960 2962 # Some classes override linkrev to take filtered revs into
2961 2963 # account. Use raw entry from index.
2962 2964 flags = entry[0] & 0xffff
2963 2965 linkrev = entry[4]
2964 2966 p1 = index[entry[5]][7]
2965 2967 p2 = index[entry[6]][7]
2966 2968 node = entry[7]
2967 2969
2968 2970 # (Possibly) reuse the delta from the revlog if allowed and
2969 2971 # the revlog chunk is a delta.
2970 2972 cachedelta = None
2971 2973 rawtext = None
2972 2974 if populatecachedelta:
2973 2975 dp = self.deltaparent(rev)
2974 2976 if dp != nullrev:
2975 2977 cachedelta = (dp, bytes(self._chunk(rev)))
2976 2978
2977 2979 if not cachedelta:
2978 2980 rawtext = self.revision(rev, raw=True)
2979 2981
2980 2982
2981 2983 if deltareuse == self.DELTAREUSEFULLADD:
2982 2984 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2983 2985 cachedelta=cachedelta,
2984 2986 node=node, flags=flags,
2985 2987 deltacomputer=deltacomputer)
2986 2988 else:
2987 2989 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2988 2990 checkambig=False)
2989 2991 dfh = None
2990 2992 if not destrevlog._inline:
2991 2993 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2992 2994 try:
2993 2995 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2994 2996 p2, flags, cachedelta, ifh, dfh,
2995 2997 deltacomputer=deltacomputer)
2996 2998 finally:
2997 2999 if dfh:
2998 3000 dfh.close()
2999 3001 ifh.close()
3000 3002
3001 3003 if addrevisioncb:
3002 3004 addrevisioncb(self, rev, node)
3003 3005 finally:
3004 3006 destrevlog._lazydeltabase = oldlazydeltabase
3005 3007 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now