##// END OF EJS Templates
pure: create type for revlog v0 index...
Martin von Zweigbergk -
r38885:da5a666f default
parent child Browse files
Show More
@@ -1,2966 +1,2971 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # Trim empty revs at the end, but never the very first revision of a chain
267 267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 268 endidx -= 1
269 269
270 270 return revs[startidx:endidx]
271 271
272 272 def _segmentspan(revlog, revs):
273 273 """Get the byte span of a segment of revisions
274 274
275 275 revs is a sorted array of revision numbers
276 276
277 277 >>> revlog = _testrevlog([
278 278 ... 5, #0
279 279 ... 10, #1
280 280 ... 12, #2
281 281 ... 12, #3 (empty)
282 282 ... 17, #4
283 283 ... ])
284 284
285 285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 286 17
287 287 >>> _segmentspan(revlog, [0, 4])
288 288 17
289 289 >>> _segmentspan(revlog, [3, 4])
290 290 5
291 291 >>> _segmentspan(revlog, [1, 2, 3,])
292 292 7
293 293 >>> _segmentspan(revlog, [1, 3])
294 294 7
295 295 """
296 296 if not revs:
297 297 return 0
298 298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299 299
300 300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 301 """slice revs to reduce the amount of unrelated data to be read from disk.
302 302
303 303 ``revs`` is sliced into groups that should be read in one time.
304 304 Assume that revs are sorted.
305 305
306 306 The initial chunk is sliced until the overall density (payload/chunks-span
307 307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 308 `revlog._srmingapsize` is skipped.
309 309
310 310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 311 For consistency with other slicing choice, this limit won't go lower than
312 312 `revlog._srmingapsize`.
313 313
314 314 If individual revisions chunk are larger than this limit, they will still
315 315 be raised individually.
316 316
317 317 >>> revlog = _testrevlog([
318 318 ... 5, #00 (5)
319 319 ... 10, #01 (5)
320 320 ... 12, #02 (2)
321 321 ... 12, #03 (empty)
322 322 ... 27, #04 (15)
323 323 ... 31, #05 (4)
324 324 ... 31, #06 (empty)
325 325 ... 42, #07 (11)
326 326 ... 47, #08 (5)
327 327 ... 47, #09 (empty)
328 328 ... 48, #10 (1)
329 329 ... 51, #11 (3)
330 330 ... 74, #12 (23)
331 331 ... 85, #13 (11)
332 332 ... 86, #14 (1)
333 333 ... 91, #15 (5)
334 334 ... ])
335 335
336 336 >>> list(_slicechunk(revlog, list(range(16))))
337 337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 338 >>> list(_slicechunk(revlog, [0, 15]))
339 339 [[0], [15]]
340 340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 341 [[0], [11], [15]]
342 342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 343 [[0], [11, 13, 15]]
344 344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 345 [[1, 2], [5, 8, 10, 11], [14]]
346 346
347 347 Slicing with a maximum chunk size
348 348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 349 [[0], [11], [13], [15]]
350 350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 351 [[0], [11], [13, 15]]
352 352 """
353 353 if targetsize is not None:
354 354 targetsize = max(targetsize, revlog._srmingapsize)
355 355 # targetsize should not be specified when evaluating delta candidates:
356 356 # * targetsize is used to ensure we stay within specification when reading,
357 357 # * deltainfo is used to pick are good delta chain when writing.
358 358 if not (deltainfo is None or targetsize is None):
359 359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 360 raise error.ProgrammingError(msg)
361 361 for chunk in _slicechunktodensity(revlog, revs,
362 362 deltainfo,
363 363 revlog._srdensitythreshold,
364 364 revlog._srmingapsize):
365 365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 366 yield subchunk
367 367
368 368 def _slicechunktosize(revlog, revs, targetsize=None):
369 369 """slice revs to match the target size
370 370
371 371 This is intended to be used on chunk that density slicing selected by that
372 372 are still too large compared to the read garantee of revlog. This might
373 373 happens when "minimal gap size" interrupted the slicing or when chain are
374 374 built in a way that create large blocks next to each other.
375 375
376 376 >>> revlog = _testrevlog([
377 377 ... 3, #0 (3)
378 378 ... 5, #1 (2)
379 379 ... 6, #2 (1)
380 380 ... 8, #3 (2)
381 381 ... 8, #4 (empty)
382 382 ... 11, #5 (3)
383 383 ... 12, #6 (1)
384 384 ... 13, #7 (1)
385 385 ... 14, #8 (1)
386 386 ... ])
387 387
388 388 Cases where chunk is already small enough
389 389 >>> list(_slicechunktosize(revlog, [0], 3))
390 390 [[0]]
391 391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 392 [[6, 7]]
393 393 >>> list(_slicechunktosize(revlog, [0], None))
394 394 [[0]]
395 395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 396 [[6, 7]]
397 397
398 398 cases where we need actual slicing
399 399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 400 [[0], [1]]
401 401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 402 [[1], [3]]
403 403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 404 [[1, 2], [3]]
405 405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 406 [[3], [5]]
407 407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 408 [[3], [5]]
409 409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 410 [[5], [6, 7, 8]]
411 411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413 413
414 414 Case with too large individual chunk (must return valid chunk)
415 415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 416 [[0], [1]]
417 417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 418 [[1], [3]]
419 419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 420 [[3], [5]]
421 421 """
422 422 assert targetsize is None or 0 <= targetsize
423 423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 424 yield revs
425 425 return
426 426
427 427 startrevidx = 0
428 428 startdata = revlog.start(revs[0])
429 429 endrevidx = 0
430 430 iterrevs = enumerate(revs)
431 431 next(iterrevs) # skip first rev.
432 432 for idx, r in iterrevs:
433 433 span = revlog.end(r) - startdata
434 434 if span <= targetsize:
435 435 endrevidx = idx
436 436 else:
437 437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 438 if chunk:
439 439 yield chunk
440 440 startrevidx = idx
441 441 startdata = revlog.start(r)
442 442 endrevidx = idx
443 443 yield _trimchunk(revlog, revs, startrevidx)
444 444
445 445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 446 mingapsize=0):
447 447 """slice revs to reduce the amount of unrelated data to be read from disk.
448 448
449 449 ``revs`` is sliced into groups that should be read in one time.
450 450 Assume that revs are sorted.
451 451
452 452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 453 to the top of the revlog.
454 454
455 455 The initial chunk is sliced until the overall density (payload/chunks-span
456 456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 457 skipped.
458 458
459 459 >>> revlog = _testrevlog([
460 460 ... 5, #00 (5)
461 461 ... 10, #01 (5)
462 462 ... 12, #02 (2)
463 463 ... 12, #03 (empty)
464 464 ... 27, #04 (15)
465 465 ... 31, #05 (4)
466 466 ... 31, #06 (empty)
467 467 ... 42, #07 (11)
468 468 ... 47, #08 (5)
469 469 ... 47, #09 (empty)
470 470 ... 48, #10 (1)
471 471 ... 51, #11 (3)
472 472 ... 74, #12 (23)
473 473 ... 85, #13 (11)
474 474 ... 86, #14 (1)
475 475 ... 91, #15 (5)
476 476 ... ])
477 477
478 478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 481 [[0], [15]]
482 482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 483 [[0], [11], [15]]
484 484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 485 [[0], [11, 13, 15]]
486 486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 487 [[1, 2], [5, 8, 10, 11], [14]]
488 488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 489 ... mingapsize=20))
490 490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 492 ... targetdensity=0.95))
493 493 [[1, 2], [5], [8, 10, 11], [14]]
494 494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 495 ... targetdensity=0.95, mingapsize=12))
496 496 [[1, 2], [5, 8, 10, 11], [14]]
497 497 """
498 498 start = revlog.start
499 499 length = revlog.length
500 500
501 501 if len(revs) <= 1:
502 502 yield revs
503 503 return
504 504
505 505 nextrev = len(revlog)
506 506 nextoffset = revlog.end(nextrev - 1)
507 507
508 508 if deltainfo is None:
509 509 deltachainspan = _segmentspan(revlog, revs)
510 510 chainpayload = sum(length(r) for r in revs)
511 511 else:
512 512 deltachainspan = deltainfo.distance
513 513 chainpayload = deltainfo.compresseddeltalen
514 514
515 515 if deltachainspan < mingapsize:
516 516 yield revs
517 517 return
518 518
519 519 readdata = deltachainspan
520 520
521 521 if deltachainspan:
522 522 density = chainpayload / float(deltachainspan)
523 523 else:
524 524 density = 1.0
525 525
526 526 if density >= targetdensity:
527 527 yield revs
528 528 return
529 529
530 530 if deltainfo is not None:
531 531 revs = list(revs)
532 532 revs.append(nextrev)
533 533
534 534 # Store the gaps in a heap to have them sorted by decreasing size
535 535 gapsheap = []
536 536 heapq.heapify(gapsheap)
537 537 prevend = None
538 538 for i, rev in enumerate(revs):
539 539 if rev < nextrev:
540 540 revstart = start(rev)
541 541 revlen = length(rev)
542 542 else:
543 543 revstart = nextoffset
544 544 revlen = deltainfo.deltalen
545 545
546 546 # Skip empty revisions to form larger holes
547 547 if revlen == 0:
548 548 continue
549 549
550 550 if prevend is not None:
551 551 gapsize = revstart - prevend
552 552 # only consider holes that are large enough
553 553 if gapsize > mingapsize:
554 554 heapq.heappush(gapsheap, (-gapsize, i))
555 555
556 556 prevend = revstart + revlen
557 557
558 558 # Collect the indices of the largest holes until the density is acceptable
559 559 indicesheap = []
560 560 heapq.heapify(indicesheap)
561 561 while gapsheap and density < targetdensity:
562 562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563 563
564 564 heapq.heappush(indicesheap, gapidx)
565 565
566 566 # the gap sizes are stored as negatives to be sorted decreasingly
567 567 # by the heap
568 568 readdata -= (-oppgapsize)
569 569 if readdata > 0:
570 570 density = chainpayload / float(readdata)
571 571 else:
572 572 density = 1.0
573 573
574 574 # Cut the revs at collected indices
575 575 previdx = 0
576 576 while indicesheap:
577 577 idx = heapq.heappop(indicesheap)
578 578
579 579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 580 if chunk:
581 581 yield chunk
582 582
583 583 previdx = idx
584 584
585 585 chunk = _trimchunk(revlog, revs, previdx)
586 586 if chunk:
587 587 yield chunk
588 588
589 589 @attr.s(slots=True, frozen=True)
590 590 class _deltainfo(object):
591 591 distance = attr.ib()
592 592 deltalen = attr.ib()
593 593 data = attr.ib()
594 594 base = attr.ib()
595 595 chainbase = attr.ib()
596 596 chainlen = attr.ib()
597 597 compresseddeltalen = attr.ib()
598 598
599 599 class _deltacomputer(object):
600 600 def __init__(self, revlog):
601 601 self.revlog = revlog
602 602
603 603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 604 """
605 605 Provides revisions that present an interest to be diffed against,
606 606 grouped by level of easiness.
607 607 """
608 608 revlog = self.revlog
609 609 gdelta = revlog._generaldelta
610 610 curr = len(revlog)
611 611 prev = curr - 1
612 612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613 613
614 614 # should we try to build a delta?
615 615 if prev != nullrev and revlog.storedeltachains:
616 616 tested = set()
617 617 # This condition is true most of the time when processing
618 618 # changegroup data into a generaldelta repo. The only time it
619 619 # isn't true is if this is the first revision in a delta chain
620 620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 621 if cachedelta and gdelta and revlog._lazydeltabase:
622 622 # Assume what we received from the server is a good choice
623 623 # build delta will reuse the cache
624 624 yield (cachedelta[0],)
625 625 tested.add(cachedelta[0])
626 626
627 627 if gdelta:
628 628 # exclude already lazy tested base if any
629 629 parents = [p for p in (p1r, p2r)
630 630 if p != nullrev and p not in tested]
631 631
632 632 if not revlog._deltabothparents and len(parents) == 2:
633 633 parents.sort()
634 634 # To minimize the chance of having to build a fulltext,
635 635 # pick first whichever parent is closest to us (max rev)
636 636 yield (parents[1],)
637 637 # then the other one (min rev) if the first did not fit
638 638 yield (parents[0],)
639 639 tested.update(parents)
640 640 elif len(parents) > 0:
641 641 # Test all parents (1 or 2), and keep the best candidate
642 642 yield parents
643 643 tested.update(parents)
644 644
645 645 if prev not in tested:
646 646 # other approach failed try against prev to hopefully save us a
647 647 # fulltext.
648 648 yield (prev,)
649 649 tested.add(prev)
650 650
651 651 def buildtext(self, revinfo, fh):
652 652 """Builds a fulltext version of a revision
653 653
654 654 revinfo: _revisioninfo instance that contains all needed info
655 655 fh: file handle to either the .i or the .d revlog file,
656 656 depending on whether it is inlined or not
657 657 """
658 658 btext = revinfo.btext
659 659 if btext[0] is not None:
660 660 return btext[0]
661 661
662 662 revlog = self.revlog
663 663 cachedelta = revinfo.cachedelta
664 664 flags = revinfo.flags
665 665 node = revinfo.node
666 666
667 667 baserev = cachedelta[0]
668 668 delta = cachedelta[1]
669 669 # special case deltas which replace entire base; no need to decode
670 670 # base revision. this neatly avoids censored bases, which throw when
671 671 # they're decoded.
672 672 hlen = struct.calcsize(">lll")
673 673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 674 len(delta) - hlen):
675 675 btext[0] = delta[hlen:]
676 676 else:
677 677 # deltabase is rawtext before changed by flag processors, which is
678 678 # equivalent to non-raw text
679 679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 680 btext[0] = mdiff.patch(basetext, delta)
681 681
682 682 try:
683 683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 684 btext[0], validatehash = res
685 685 if validatehash:
686 686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 687 if flags & REVIDX_ISCENSORED:
688 688 raise RevlogError(_('node %s is not censored') % node)
689 689 except CensoredNodeError:
690 690 # must pass the censored index flag to add censored revisions
691 691 if not flags & REVIDX_ISCENSORED:
692 692 raise
693 693 return btext[0]
694 694
695 695 def _builddeltadiff(self, base, revinfo, fh):
696 696 revlog = self.revlog
697 697 t = self.buildtext(revinfo, fh)
698 698 if revlog.iscensored(base):
699 699 # deltas based on a censored revision must replace the
700 700 # full content in one patch, so delta works everywhere
701 701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 702 delta = header + t
703 703 else:
704 704 ptext = revlog.revision(base, _df=fh, raw=True)
705 705 delta = mdiff.textdiff(ptext, t)
706 706
707 707 return delta
708 708
709 709 def _builddeltainfo(self, revinfo, base, fh):
710 710 # can we use the cached delta?
711 711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 712 delta = revinfo.cachedelta[1]
713 713 else:
714 714 delta = self._builddeltadiff(base, revinfo, fh)
715 715 revlog = self.revlog
716 716 header, data = revlog.compress(delta)
717 717 deltalen = len(header) + len(data)
718 718 chainbase = revlog.chainbase(base)
719 719 offset = revlog.end(len(revlog) - 1)
720 720 dist = deltalen + offset - revlog.start(chainbase)
721 721 if revlog._generaldelta:
722 722 deltabase = base
723 723 else:
724 724 deltabase = chainbase
725 725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 726 chainlen += 1
727 727 compresseddeltalen += deltalen
728 728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 729 chainbase, chainlen, compresseddeltalen)
730 730
731 731 def finddeltainfo(self, revinfo, fh):
732 732 """Find an acceptable delta against a candidate revision
733 733
734 734 revinfo: information about the revision (instance of _revisioninfo)
735 735 fh: file handle to either the .i or the .d revlog file,
736 736 depending on whether it is inlined or not
737 737
738 738 Returns the first acceptable candidate revision, as ordered by
739 739 _getcandidaterevs
740 740 """
741 741 cachedelta = revinfo.cachedelta
742 742 p1 = revinfo.p1
743 743 p2 = revinfo.p2
744 744 revlog = self.revlog
745 745
746 746 deltainfo = None
747 747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
748 748 nominateddeltas = []
749 749 for candidaterev in candidaterevs:
750 750 # no delta for rawtext-changing revs (see "candelta" for why)
751 751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
752 752 continue
753 753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
754 754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
755 755 nominateddeltas.append(candidatedelta)
756 756 if nominateddeltas:
757 757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
758 758 break
759 759
760 760 return deltainfo
761 761
762 762 @attr.s(slots=True, frozen=True)
763 763 class _revisioninfo(object):
764 764 """Information about a revision that allows building its fulltext
765 765 node: expected hash of the revision
766 766 p1, p2: parent revs of the revision
767 767 btext: built text cache consisting of a one-element list
768 768 cachedelta: (baserev, uncompressed_delta) or None
769 769 flags: flags associated to the revision storage
770 770
771 771 One of btext[0] or cachedelta must be set.
772 772 """
773 773 node = attr.ib()
774 774 p1 = attr.ib()
775 775 p2 = attr.ib()
776 776 btext = attr.ib()
777 777 textlen = attr.ib()
778 778 cachedelta = attr.ib()
779 779 flags = attr.ib()
780 780
781 781 # index v0:
782 782 # 4 bytes: offset
783 783 # 4 bytes: compressed length
784 784 # 4 bytes: base rev
785 785 # 4 bytes: link rev
786 786 # 20 bytes: parent 1 nodeid
787 787 # 20 bytes: parent 2 nodeid
788 788 # 20 bytes: nodeid
789 789 indexformatv0 = struct.Struct(">4l20s20s20s")
790 790 indexformatv0_pack = indexformatv0.pack
791 791 indexformatv0_unpack = indexformatv0.unpack
792 792
793 class revlogoldindex(list):
794 def __len__(self):
795 return list.__len__(self) + 1
796 def __getitem__(self, i):
797 if i == -1 or i == len(self) - 1:
798 return (0, 0, 0, -1, -1, -1, -1, nullid)
799 return list.__getitem__(self, i)
800
793 801 class revlogoldio(object):
794 802 def __init__(self):
795 803 self.size = indexformatv0.size
796 804
797 805 def parseindex(self, data, inline):
798 806 s = self.size
799 807 index = []
800 808 nodemap = {nullid: nullrev}
801 809 n = off = 0
802 810 l = len(data)
803 811 while off + s <= l:
804 812 cur = data[off:off + s]
805 813 off += s
806 814 e = indexformatv0_unpack(cur)
807 815 # transform to revlogv1 format
808 816 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
809 817 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
810 818 index.append(e2)
811 819 nodemap[e[6]] = n
812 820 n += 1
813 821
814 # add the magic null revision at -1
815 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
816
817 return index, nodemap, None
822 return revlogoldindex(index), nodemap, None
818 823
819 824 def packentry(self, entry, node, version, rev):
820 825 if gettype(entry[0]):
821 826 raise RevlogError(_('index entry flags need revlog version 1'))
822 827 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
823 828 node(entry[5]), node(entry[6]), entry[7])
824 829 return indexformatv0_pack(*e2)
825 830
826 831 # index ng:
827 832 # 6 bytes: offset
828 833 # 2 bytes: flags
829 834 # 4 bytes: compressed length
830 835 # 4 bytes: uncompressed length
831 836 # 4 bytes: base rev
832 837 # 4 bytes: link rev
833 838 # 4 bytes: parent 1 rev
834 839 # 4 bytes: parent 2 rev
835 840 # 32 bytes: nodeid
836 841 indexformatng = struct.Struct(">Qiiiiii20s12x")
837 842 indexformatng_pack = indexformatng.pack
838 843 versionformat = struct.Struct(">I")
839 844 versionformat_pack = versionformat.pack
840 845 versionformat_unpack = versionformat.unpack
841 846
842 847 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
843 848 # signed integer)
844 849 _maxentrysize = 0x7fffffff
845 850
846 851 class revlogio(object):
847 852 def __init__(self):
848 853 self.size = indexformatng.size
849 854
850 855 def parseindex(self, data, inline):
851 856 # call the C implementation to parse the index data
852 857 index, cache = parsers.parse_index2(data, inline)
853 858 return index, getattr(index, 'nodemap', None), cache
854 859
855 860 def packentry(self, entry, node, version, rev):
856 861 p = indexformatng_pack(*entry)
857 862 if rev == 0:
858 863 p = versionformat_pack(version) + p[4:]
859 864 return p
860 865
861 866 class revlog(object):
862 867 """
863 868 the underlying revision storage object
864 869
865 870 A revlog consists of two parts, an index and the revision data.
866 871
867 872 The index is a file with a fixed record size containing
868 873 information on each revision, including its nodeid (hash), the
869 874 nodeids of its parents, the position and offset of its data within
870 875 the data file, and the revision it's based on. Finally, each entry
871 876 contains a linkrev entry that can serve as a pointer to external
872 877 data.
873 878
874 879 The revision data itself is a linear collection of data chunks.
875 880 Each chunk represents a revision and is usually represented as a
876 881 delta against the previous chunk. To bound lookup time, runs of
877 882 deltas are limited to about 2 times the length of the original
878 883 version data. This makes retrieval of a version proportional to
879 884 its size, or O(1) relative to the number of revisions.
880 885
881 886 Both pieces of the revlog are written to in an append-only
882 887 fashion, which means we never need to rewrite a file to insert or
883 888 remove data, and can use some simple techniques to avoid the need
884 889 for locking while reading.
885 890
886 891 If checkambig, indexfile is opened with checkambig=True at
887 892 writing, to avoid file stat ambiguity.
888 893
889 894 If mmaplargeindex is True, and an mmapindexthreshold is set, the
890 895 index will be mmapped rather than read if it is larger than the
891 896 configured threshold.
892 897
893 898 If censorable is True, the revlog can have censored revisions.
894 899 """
895 900 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
896 901 mmaplargeindex=False, censorable=False):
897 902 """
898 903 create a revlog object
899 904
900 905 opener is a function that abstracts the file opening operation
901 906 and can be used to implement COW semantics or the like.
902 907 """
903 908 self.indexfile = indexfile
904 909 self.datafile = datafile or (indexfile[:-2] + ".d")
905 910 self.opener = opener
906 911 # When True, indexfile is opened with checkambig=True at writing, to
907 912 # avoid file stat ambiguity.
908 913 self._checkambig = checkambig
909 914 self._censorable = censorable
910 915 # 3-tuple of (node, rev, text) for a raw revision.
911 916 self._cache = None
912 917 # Maps rev to chain base rev.
913 918 self._chainbasecache = util.lrucachedict(100)
914 919 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
915 920 self._chunkcache = (0, '')
916 921 # How much data to read and cache into the raw revlog data cache.
917 922 self._chunkcachesize = 65536
918 923 self._maxchainlen = None
919 924 self._deltabothparents = True
920 925 self.index = []
921 926 # Mapping of partial identifiers to full nodes.
922 927 self._pcache = {}
923 928 # Mapping of revision integer to full node.
924 929 self._nodecache = {nullid: nullrev}
925 930 self._nodepos = None
926 931 self._compengine = 'zlib'
927 932 self._maxdeltachainspan = -1
928 933 self._withsparseread = False
929 934 self._sparserevlog = False
930 935 self._srdensitythreshold = 0.50
931 936 self._srmingapsize = 262144
932 937
933 938 mmapindexthreshold = None
934 939 v = REVLOG_DEFAULT_VERSION
935 940 opts = getattr(opener, 'options', None)
936 941 if opts is not None:
937 942 if 'revlogv2' in opts:
938 943 # version 2 revlogs always use generaldelta.
939 944 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
940 945 elif 'revlogv1' in opts:
941 946 if 'generaldelta' in opts:
942 947 v |= FLAG_GENERALDELTA
943 948 else:
944 949 v = 0
945 950 if 'chunkcachesize' in opts:
946 951 self._chunkcachesize = opts['chunkcachesize']
947 952 if 'maxchainlen' in opts:
948 953 self._maxchainlen = opts['maxchainlen']
949 954 if 'deltabothparents' in opts:
950 955 self._deltabothparents = opts['deltabothparents']
951 956 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
952 957 if 'compengine' in opts:
953 958 self._compengine = opts['compengine']
954 959 if 'maxdeltachainspan' in opts:
955 960 self._maxdeltachainspan = opts['maxdeltachainspan']
956 961 if mmaplargeindex and 'mmapindexthreshold' in opts:
957 962 mmapindexthreshold = opts['mmapindexthreshold']
958 963 self._sparserevlog = bool(opts.get('sparse-revlog', False))
959 964 withsparseread = bool(opts.get('with-sparse-read', False))
960 965 # sparse-revlog forces sparse-read
961 966 self._withsparseread = self._sparserevlog or withsparseread
962 967 if 'sparse-read-density-threshold' in opts:
963 968 self._srdensitythreshold = opts['sparse-read-density-threshold']
964 969 if 'sparse-read-min-gap-size' in opts:
965 970 self._srmingapsize = opts['sparse-read-min-gap-size']
966 971
967 972 if self._chunkcachesize <= 0:
968 973 raise RevlogError(_('revlog chunk cache size %r is not greater '
969 974 'than 0') % self._chunkcachesize)
970 975 elif self._chunkcachesize & (self._chunkcachesize - 1):
971 976 raise RevlogError(_('revlog chunk cache size %r is not a power '
972 977 'of 2') % self._chunkcachesize)
973 978
974 979 indexdata = ''
975 980 self._initempty = True
976 981 try:
977 982 with self._indexfp() as f:
978 983 if (mmapindexthreshold is not None and
979 984 self.opener.fstat(f).st_size >= mmapindexthreshold):
980 985 indexdata = util.buffer(util.mmapread(f))
981 986 else:
982 987 indexdata = f.read()
983 988 if len(indexdata) > 0:
984 989 v = versionformat_unpack(indexdata[:4])[0]
985 990 self._initempty = False
986 991 except IOError as inst:
987 992 if inst.errno != errno.ENOENT:
988 993 raise
989 994
990 995 self.version = v
991 996 self._inline = v & FLAG_INLINE_DATA
992 997 self._generaldelta = v & FLAG_GENERALDELTA
993 998 flags = v & ~0xFFFF
994 999 fmt = v & 0xFFFF
995 1000 if fmt == REVLOGV0:
996 1001 if flags:
997 1002 raise RevlogError(_('unknown flags (%#04x) in version %d '
998 1003 'revlog %s') %
999 1004 (flags >> 16, fmt, self.indexfile))
1000 1005 elif fmt == REVLOGV1:
1001 1006 if flags & ~REVLOGV1_FLAGS:
1002 1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
1003 1008 'revlog %s') %
1004 1009 (flags >> 16, fmt, self.indexfile))
1005 1010 elif fmt == REVLOGV2:
1006 1011 if flags & ~REVLOGV2_FLAGS:
1007 1012 raise RevlogError(_('unknown flags (%#04x) in version %d '
1008 1013 'revlog %s') %
1009 1014 (flags >> 16, fmt, self.indexfile))
1010 1015 else:
1011 1016 raise RevlogError(_('unknown version (%d) in revlog %s') %
1012 1017 (fmt, self.indexfile))
1013 1018
1014 1019 self.storedeltachains = True
1015 1020
1016 1021 self._io = revlogio()
1017 1022 if self.version == REVLOGV0:
1018 1023 self._io = revlogoldio()
1019 1024 try:
1020 1025 d = self._io.parseindex(indexdata, self._inline)
1021 1026 except (ValueError, IndexError):
1022 1027 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1023 1028 self.index, nodemap, self._chunkcache = d
1024 1029 if nodemap is not None:
1025 1030 self.nodemap = self._nodecache = nodemap
1026 1031 if not self._chunkcache:
1027 1032 self._chunkclear()
1028 1033 # revnum -> (chain-length, sum-delta-length)
1029 1034 self._chaininfocache = {}
1030 1035 # revlog header -> revlog compressor
1031 1036 self._decompressors = {}
1032 1037
1033 1038 @util.propertycache
1034 1039 def _compressor(self):
1035 1040 return util.compengines[self._compengine].revlogcompressor()
1036 1041
1037 1042 def _indexfp(self, mode='r'):
1038 1043 """file object for the revlog's index file"""
1039 1044 args = {r'mode': mode}
1040 1045 if mode != 'r':
1041 1046 args[r'checkambig'] = self._checkambig
1042 1047 if mode == 'w':
1043 1048 args[r'atomictemp'] = True
1044 1049 return self.opener(self.indexfile, **args)
1045 1050
1046 1051 def _datafp(self, mode='r'):
1047 1052 """file object for the revlog's data file"""
1048 1053 return self.opener(self.datafile, mode=mode)
1049 1054
1050 1055 @contextlib.contextmanager
1051 1056 def _datareadfp(self, existingfp=None):
1052 1057 """file object suitable to read data"""
1053 1058 if existingfp is not None:
1054 1059 yield existingfp
1055 1060 else:
1056 1061 if self._inline:
1057 1062 func = self._indexfp
1058 1063 else:
1059 1064 func = self._datafp
1060 1065 with func() as fp:
1061 1066 yield fp
1062 1067
1063 1068 def tip(self):
1064 1069 return self.node(len(self.index) - 2)
1065 1070 def __contains__(self, rev):
1066 1071 return 0 <= rev < len(self)
1067 1072 def __len__(self):
1068 1073 return len(self.index) - 1
1069 1074 def __iter__(self):
1070 1075 return iter(pycompat.xrange(len(self)))
1071 1076 def revs(self, start=0, stop=None):
1072 1077 """iterate over all rev in this revlog (from start to stop)"""
1073 1078 step = 1
1074 1079 length = len(self)
1075 1080 if stop is not None:
1076 1081 if start > stop:
1077 1082 step = -1
1078 1083 stop += step
1079 1084 if stop > length:
1080 1085 stop = length
1081 1086 else:
1082 1087 stop = length
1083 1088 return pycompat.xrange(start, stop, step)
1084 1089
1085 1090 @util.propertycache
1086 1091 def nodemap(self):
1087 1092 self.rev(self.node(0))
1088 1093 return self._nodecache
1089 1094
1090 1095 def hasnode(self, node):
1091 1096 try:
1092 1097 self.rev(node)
1093 1098 return True
1094 1099 except KeyError:
1095 1100 return False
1096 1101
1097 1102 def candelta(self, baserev, rev):
1098 1103 """whether two revisions (baserev, rev) can be delta-ed or not"""
1099 1104 # Disable delta if either rev requires a content-changing flag
1100 1105 # processor (ex. LFS). This is because such flag processor can alter
1101 1106 # the rawtext content that the delta will be based on, and two clients
1102 1107 # could have a same revlog node with different flags (i.e. different
1103 1108 # rawtext contents) and the delta could be incompatible.
1104 1109 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1105 1110 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1106 1111 return False
1107 1112 return True
1108 1113
1109 1114 def clearcaches(self):
1110 1115 self._cache = None
1111 1116 self._chainbasecache.clear()
1112 1117 self._chunkcache = (0, '')
1113 1118 self._pcache = {}
1114 1119
1115 1120 try:
1116 1121 self._nodecache.clearcaches()
1117 1122 except AttributeError:
1118 1123 self._nodecache = {nullid: nullrev}
1119 1124 self._nodepos = None
1120 1125
1121 1126 def rev(self, node):
1122 1127 try:
1123 1128 return self._nodecache[node]
1124 1129 except TypeError:
1125 1130 raise
1126 1131 except RevlogError:
1127 1132 # parsers.c radix tree lookup failed
1128 1133 if node == wdirid or node in wdirfilenodeids:
1129 1134 raise error.WdirUnsupported
1130 1135 raise LookupError(node, self.indexfile, _('no node'))
1131 1136 except KeyError:
1132 1137 # pure python cache lookup failed
1133 1138 n = self._nodecache
1134 1139 i = self.index
1135 1140 p = self._nodepos
1136 1141 if p is None:
1137 1142 p = len(i) - 2
1138 1143 else:
1139 1144 assert p < len(i)
1140 1145 for r in pycompat.xrange(p, -1, -1):
1141 1146 v = i[r][7]
1142 1147 n[v] = r
1143 1148 if v == node:
1144 1149 self._nodepos = r - 1
1145 1150 return r
1146 1151 if node == wdirid or node in wdirfilenodeids:
1147 1152 raise error.WdirUnsupported
1148 1153 raise LookupError(node, self.indexfile, _('no node'))
1149 1154
1150 1155 # Accessors for index entries.
1151 1156
1152 1157 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1153 1158 # are flags.
1154 1159 def start(self, rev):
1155 1160 return int(self.index[rev][0] >> 16)
1156 1161
1157 1162 def flags(self, rev):
1158 1163 return self.index[rev][0] & 0xFFFF
1159 1164
1160 1165 def length(self, rev):
1161 1166 return self.index[rev][1]
1162 1167
1163 1168 def rawsize(self, rev):
1164 1169 """return the length of the uncompressed text for a given revision"""
1165 1170 l = self.index[rev][2]
1166 1171 if l >= 0:
1167 1172 return l
1168 1173
1169 1174 t = self.revision(rev, raw=True)
1170 1175 return len(t)
1171 1176
1172 1177 def size(self, rev):
1173 1178 """length of non-raw text (processed by a "read" flag processor)"""
1174 1179 # fast path: if no "read" flag processor could change the content,
1175 1180 # size is rawsize. note: ELLIPSIS is known to not change the content.
1176 1181 flags = self.flags(rev)
1177 1182 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1178 1183 return self.rawsize(rev)
1179 1184
1180 1185 return len(self.revision(rev, raw=False))
1181 1186
1182 1187 def chainbase(self, rev):
1183 1188 base = self._chainbasecache.get(rev)
1184 1189 if base is not None:
1185 1190 return base
1186 1191
1187 1192 index = self.index
1188 1193 iterrev = rev
1189 1194 base = index[iterrev][3]
1190 1195 while base != iterrev:
1191 1196 iterrev = base
1192 1197 base = index[iterrev][3]
1193 1198
1194 1199 self._chainbasecache[rev] = base
1195 1200 return base
1196 1201
1197 1202 def linkrev(self, rev):
1198 1203 return self.index[rev][4]
1199 1204
1200 1205 def parentrevs(self, rev):
1201 1206 try:
1202 1207 entry = self.index[rev]
1203 1208 except IndexError:
1204 1209 if rev == wdirrev:
1205 1210 raise error.WdirUnsupported
1206 1211 raise
1207 1212
1208 1213 return entry[5], entry[6]
1209 1214
1210 1215 def node(self, rev):
1211 1216 try:
1212 1217 return self.index[rev][7]
1213 1218 except IndexError:
1214 1219 if rev == wdirrev:
1215 1220 raise error.WdirUnsupported
1216 1221 raise
1217 1222
1218 1223 # Derived from index values.
1219 1224
1220 1225 def end(self, rev):
1221 1226 return self.start(rev) + self.length(rev)
1222 1227
1223 1228 def parents(self, node):
1224 1229 i = self.index
1225 1230 d = i[self.rev(node)]
1226 1231 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1227 1232
1228 1233 def chainlen(self, rev):
1229 1234 return self._chaininfo(rev)[0]
1230 1235
1231 1236 def _chaininfo(self, rev):
1232 1237 chaininfocache = self._chaininfocache
1233 1238 if rev in chaininfocache:
1234 1239 return chaininfocache[rev]
1235 1240 index = self.index
1236 1241 generaldelta = self._generaldelta
1237 1242 iterrev = rev
1238 1243 e = index[iterrev]
1239 1244 clen = 0
1240 1245 compresseddeltalen = 0
1241 1246 while iterrev != e[3]:
1242 1247 clen += 1
1243 1248 compresseddeltalen += e[1]
1244 1249 if generaldelta:
1245 1250 iterrev = e[3]
1246 1251 else:
1247 1252 iterrev -= 1
1248 1253 if iterrev in chaininfocache:
1249 1254 t = chaininfocache[iterrev]
1250 1255 clen += t[0]
1251 1256 compresseddeltalen += t[1]
1252 1257 break
1253 1258 e = index[iterrev]
1254 1259 else:
1255 1260 # Add text length of base since decompressing that also takes
1256 1261 # work. For cache hits the length is already included.
1257 1262 compresseddeltalen += e[1]
1258 1263 r = (clen, compresseddeltalen)
1259 1264 chaininfocache[rev] = r
1260 1265 return r
1261 1266
1262 1267 def _deltachain(self, rev, stoprev=None):
1263 1268 """Obtain the delta chain for a revision.
1264 1269
1265 1270 ``stoprev`` specifies a revision to stop at. If not specified, we
1266 1271 stop at the base of the chain.
1267 1272
1268 1273 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1269 1274 revs in ascending order and ``stopped`` is a bool indicating whether
1270 1275 ``stoprev`` was hit.
1271 1276 """
1272 1277 # Try C implementation.
1273 1278 try:
1274 1279 return self.index.deltachain(rev, stoprev, self._generaldelta)
1275 1280 except AttributeError:
1276 1281 pass
1277 1282
1278 1283 chain = []
1279 1284
1280 1285 # Alias to prevent attribute lookup in tight loop.
1281 1286 index = self.index
1282 1287 generaldelta = self._generaldelta
1283 1288
1284 1289 iterrev = rev
1285 1290 e = index[iterrev]
1286 1291 while iterrev != e[3] and iterrev != stoprev:
1287 1292 chain.append(iterrev)
1288 1293 if generaldelta:
1289 1294 iterrev = e[3]
1290 1295 else:
1291 1296 iterrev -= 1
1292 1297 e = index[iterrev]
1293 1298
1294 1299 if iterrev == stoprev:
1295 1300 stopped = True
1296 1301 else:
1297 1302 chain.append(iterrev)
1298 1303 stopped = False
1299 1304
1300 1305 chain.reverse()
1301 1306 return chain, stopped
1302 1307
1303 1308 def ancestors(self, revs, stoprev=0, inclusive=False):
1304 1309 """Generate the ancestors of 'revs' in reverse topological order.
1305 1310 Does not generate revs lower than stoprev.
1306 1311
1307 1312 See the documentation for ancestor.lazyancestors for more details."""
1308 1313
1309 1314 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1310 1315 inclusive=inclusive)
1311 1316
1312 1317 def descendants(self, revs):
1313 1318 """Generate the descendants of 'revs' in revision order.
1314 1319
1315 1320 Yield a sequence of revision numbers starting with a child of
1316 1321 some rev in revs, i.e., each revision is *not* considered a
1317 1322 descendant of itself. Results are ordered by revision number (a
1318 1323 topological sort)."""
1319 1324 first = min(revs)
1320 1325 if first == nullrev:
1321 1326 for i in self:
1322 1327 yield i
1323 1328 return
1324 1329
1325 1330 seen = set(revs)
1326 1331 for i in self.revs(start=first + 1):
1327 1332 for x in self.parentrevs(i):
1328 1333 if x != nullrev and x in seen:
1329 1334 seen.add(i)
1330 1335 yield i
1331 1336 break
1332 1337
1333 1338 def findcommonmissing(self, common=None, heads=None):
1334 1339 """Return a tuple of the ancestors of common and the ancestors of heads
1335 1340 that are not ancestors of common. In revset terminology, we return the
1336 1341 tuple:
1337 1342
1338 1343 ::common, (::heads) - (::common)
1339 1344
1340 1345 The list is sorted by revision number, meaning it is
1341 1346 topologically sorted.
1342 1347
1343 1348 'heads' and 'common' are both lists of node IDs. If heads is
1344 1349 not supplied, uses all of the revlog's heads. If common is not
1345 1350 supplied, uses nullid."""
1346 1351 if common is None:
1347 1352 common = [nullid]
1348 1353 if heads is None:
1349 1354 heads = self.heads()
1350 1355
1351 1356 common = [self.rev(n) for n in common]
1352 1357 heads = [self.rev(n) for n in heads]
1353 1358
1354 1359 # we want the ancestors, but inclusive
1355 1360 class lazyset(object):
1356 1361 def __init__(self, lazyvalues):
1357 1362 self.addedvalues = set()
1358 1363 self.lazyvalues = lazyvalues
1359 1364
1360 1365 def __contains__(self, value):
1361 1366 return value in self.addedvalues or value in self.lazyvalues
1362 1367
1363 1368 def __iter__(self):
1364 1369 added = self.addedvalues
1365 1370 for r in added:
1366 1371 yield r
1367 1372 for r in self.lazyvalues:
1368 1373 if not r in added:
1369 1374 yield r
1370 1375
1371 1376 def add(self, value):
1372 1377 self.addedvalues.add(value)
1373 1378
1374 1379 def update(self, values):
1375 1380 self.addedvalues.update(values)
1376 1381
1377 1382 has = lazyset(self.ancestors(common))
1378 1383 has.add(nullrev)
1379 1384 has.update(common)
1380 1385
1381 1386 # take all ancestors from heads that aren't in has
1382 1387 missing = set()
1383 1388 visit = collections.deque(r for r in heads if r not in has)
1384 1389 while visit:
1385 1390 r = visit.popleft()
1386 1391 if r in missing:
1387 1392 continue
1388 1393 else:
1389 1394 missing.add(r)
1390 1395 for p in self.parentrevs(r):
1391 1396 if p not in has:
1392 1397 visit.append(p)
1393 1398 missing = list(missing)
1394 1399 missing.sort()
1395 1400 return has, [self.node(miss) for miss in missing]
1396 1401
1397 1402 def incrementalmissingrevs(self, common=None):
1398 1403 """Return an object that can be used to incrementally compute the
1399 1404 revision numbers of the ancestors of arbitrary sets that are not
1400 1405 ancestors of common. This is an ancestor.incrementalmissingancestors
1401 1406 object.
1402 1407
1403 1408 'common' is a list of revision numbers. If common is not supplied, uses
1404 1409 nullrev.
1405 1410 """
1406 1411 if common is None:
1407 1412 common = [nullrev]
1408 1413
1409 1414 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1410 1415
1411 1416 def findmissingrevs(self, common=None, heads=None):
1412 1417 """Return the revision numbers of the ancestors of heads that
1413 1418 are not ancestors of common.
1414 1419
1415 1420 More specifically, return a list of revision numbers corresponding to
1416 1421 nodes N such that every N satisfies the following constraints:
1417 1422
1418 1423 1. N is an ancestor of some node in 'heads'
1419 1424 2. N is not an ancestor of any node in 'common'
1420 1425
1421 1426 The list is sorted by revision number, meaning it is
1422 1427 topologically sorted.
1423 1428
1424 1429 'heads' and 'common' are both lists of revision numbers. If heads is
1425 1430 not supplied, uses all of the revlog's heads. If common is not
1426 1431 supplied, uses nullid."""
1427 1432 if common is None:
1428 1433 common = [nullrev]
1429 1434 if heads is None:
1430 1435 heads = self.headrevs()
1431 1436
1432 1437 inc = self.incrementalmissingrevs(common=common)
1433 1438 return inc.missingancestors(heads)
1434 1439
1435 1440 def findmissing(self, common=None, heads=None):
1436 1441 """Return the ancestors of heads that are not ancestors of common.
1437 1442
1438 1443 More specifically, return a list of nodes N such that every N
1439 1444 satisfies the following constraints:
1440 1445
1441 1446 1. N is an ancestor of some node in 'heads'
1442 1447 2. N is not an ancestor of any node in 'common'
1443 1448
1444 1449 The list is sorted by revision number, meaning it is
1445 1450 topologically sorted.
1446 1451
1447 1452 'heads' and 'common' are both lists of node IDs. If heads is
1448 1453 not supplied, uses all of the revlog's heads. If common is not
1449 1454 supplied, uses nullid."""
1450 1455 if common is None:
1451 1456 common = [nullid]
1452 1457 if heads is None:
1453 1458 heads = self.heads()
1454 1459
1455 1460 common = [self.rev(n) for n in common]
1456 1461 heads = [self.rev(n) for n in heads]
1457 1462
1458 1463 inc = self.incrementalmissingrevs(common=common)
1459 1464 return [self.node(r) for r in inc.missingancestors(heads)]
1460 1465
1461 1466 def nodesbetween(self, roots=None, heads=None):
1462 1467 """Return a topological path from 'roots' to 'heads'.
1463 1468
1464 1469 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1465 1470 topologically sorted list of all nodes N that satisfy both of
1466 1471 these constraints:
1467 1472
1468 1473 1. N is a descendant of some node in 'roots'
1469 1474 2. N is an ancestor of some node in 'heads'
1470 1475
1471 1476 Every node is considered to be both a descendant and an ancestor
1472 1477 of itself, so every reachable node in 'roots' and 'heads' will be
1473 1478 included in 'nodes'.
1474 1479
1475 1480 'outroots' is the list of reachable nodes in 'roots', i.e., the
1476 1481 subset of 'roots' that is returned in 'nodes'. Likewise,
1477 1482 'outheads' is the subset of 'heads' that is also in 'nodes'.
1478 1483
1479 1484 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1480 1485 unspecified, uses nullid as the only root. If 'heads' is
1481 1486 unspecified, uses list of all of the revlog's heads."""
1482 1487 nonodes = ([], [], [])
1483 1488 if roots is not None:
1484 1489 roots = list(roots)
1485 1490 if not roots:
1486 1491 return nonodes
1487 1492 lowestrev = min([self.rev(n) for n in roots])
1488 1493 else:
1489 1494 roots = [nullid] # Everybody's a descendant of nullid
1490 1495 lowestrev = nullrev
1491 1496 if (lowestrev == nullrev) and (heads is None):
1492 1497 # We want _all_ the nodes!
1493 1498 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1494 1499 if heads is None:
1495 1500 # All nodes are ancestors, so the latest ancestor is the last
1496 1501 # node.
1497 1502 highestrev = len(self) - 1
1498 1503 # Set ancestors to None to signal that every node is an ancestor.
1499 1504 ancestors = None
1500 1505 # Set heads to an empty dictionary for later discovery of heads
1501 1506 heads = {}
1502 1507 else:
1503 1508 heads = list(heads)
1504 1509 if not heads:
1505 1510 return nonodes
1506 1511 ancestors = set()
1507 1512 # Turn heads into a dictionary so we can remove 'fake' heads.
1508 1513 # Also, later we will be using it to filter out the heads we can't
1509 1514 # find from roots.
1510 1515 heads = dict.fromkeys(heads, False)
1511 1516 # Start at the top and keep marking parents until we're done.
1512 1517 nodestotag = set(heads)
1513 1518 # Remember where the top was so we can use it as a limit later.
1514 1519 highestrev = max([self.rev(n) for n in nodestotag])
1515 1520 while nodestotag:
1516 1521 # grab a node to tag
1517 1522 n = nodestotag.pop()
1518 1523 # Never tag nullid
1519 1524 if n == nullid:
1520 1525 continue
1521 1526 # A node's revision number represents its place in a
1522 1527 # topologically sorted list of nodes.
1523 1528 r = self.rev(n)
1524 1529 if r >= lowestrev:
1525 1530 if n not in ancestors:
1526 1531 # If we are possibly a descendant of one of the roots
1527 1532 # and we haven't already been marked as an ancestor
1528 1533 ancestors.add(n) # Mark as ancestor
1529 1534 # Add non-nullid parents to list of nodes to tag.
1530 1535 nodestotag.update([p for p in self.parents(n) if
1531 1536 p != nullid])
1532 1537 elif n in heads: # We've seen it before, is it a fake head?
1533 1538 # So it is, real heads should not be the ancestors of
1534 1539 # any other heads.
1535 1540 heads.pop(n)
1536 1541 if not ancestors:
1537 1542 return nonodes
1538 1543 # Now that we have our set of ancestors, we want to remove any
1539 1544 # roots that are not ancestors.
1540 1545
1541 1546 # If one of the roots was nullid, everything is included anyway.
1542 1547 if lowestrev > nullrev:
1543 1548 # But, since we weren't, let's recompute the lowest rev to not
1544 1549 # include roots that aren't ancestors.
1545 1550
1546 1551 # Filter out roots that aren't ancestors of heads
1547 1552 roots = [root for root in roots if root in ancestors]
1548 1553 # Recompute the lowest revision
1549 1554 if roots:
1550 1555 lowestrev = min([self.rev(root) for root in roots])
1551 1556 else:
1552 1557 # No more roots? Return empty list
1553 1558 return nonodes
1554 1559 else:
1555 1560 # We are descending from nullid, and don't need to care about
1556 1561 # any other roots.
1557 1562 lowestrev = nullrev
1558 1563 roots = [nullid]
1559 1564 # Transform our roots list into a set.
1560 1565 descendants = set(roots)
1561 1566 # Also, keep the original roots so we can filter out roots that aren't
1562 1567 # 'real' roots (i.e. are descended from other roots).
1563 1568 roots = descendants.copy()
1564 1569 # Our topologically sorted list of output nodes.
1565 1570 orderedout = []
1566 1571 # Don't start at nullid since we don't want nullid in our output list,
1567 1572 # and if nullid shows up in descendants, empty parents will look like
1568 1573 # they're descendants.
1569 1574 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1570 1575 n = self.node(r)
1571 1576 isdescendant = False
1572 1577 if lowestrev == nullrev: # Everybody is a descendant of nullid
1573 1578 isdescendant = True
1574 1579 elif n in descendants:
1575 1580 # n is already a descendant
1576 1581 isdescendant = True
1577 1582 # This check only needs to be done here because all the roots
1578 1583 # will start being marked is descendants before the loop.
1579 1584 if n in roots:
1580 1585 # If n was a root, check if it's a 'real' root.
1581 1586 p = tuple(self.parents(n))
1582 1587 # If any of its parents are descendants, it's not a root.
1583 1588 if (p[0] in descendants) or (p[1] in descendants):
1584 1589 roots.remove(n)
1585 1590 else:
1586 1591 p = tuple(self.parents(n))
1587 1592 # A node is a descendant if either of its parents are
1588 1593 # descendants. (We seeded the dependents list with the roots
1589 1594 # up there, remember?)
1590 1595 if (p[0] in descendants) or (p[1] in descendants):
1591 1596 descendants.add(n)
1592 1597 isdescendant = True
1593 1598 if isdescendant and ((ancestors is None) or (n in ancestors)):
1594 1599 # Only include nodes that are both descendants and ancestors.
1595 1600 orderedout.append(n)
1596 1601 if (ancestors is not None) and (n in heads):
1597 1602 # We're trying to figure out which heads are reachable
1598 1603 # from roots.
1599 1604 # Mark this head as having been reached
1600 1605 heads[n] = True
1601 1606 elif ancestors is None:
1602 1607 # Otherwise, we're trying to discover the heads.
1603 1608 # Assume this is a head because if it isn't, the next step
1604 1609 # will eventually remove it.
1605 1610 heads[n] = True
1606 1611 # But, obviously its parents aren't.
1607 1612 for p in self.parents(n):
1608 1613 heads.pop(p, None)
1609 1614 heads = [head for head, flag in heads.iteritems() if flag]
1610 1615 roots = list(roots)
1611 1616 assert orderedout
1612 1617 assert roots
1613 1618 assert heads
1614 1619 return (orderedout, roots, heads)
1615 1620
1616 1621 def headrevs(self):
1617 1622 try:
1618 1623 return self.index.headrevs()
1619 1624 except AttributeError:
1620 1625 return self._headrevs()
1621 1626
1622 1627 def computephases(self, roots):
1623 1628 return self.index.computephasesmapsets(roots)
1624 1629
1625 1630 def _headrevs(self):
1626 1631 count = len(self)
1627 1632 if not count:
1628 1633 return [nullrev]
1629 1634 # we won't iter over filtered rev so nobody is a head at start
1630 1635 ishead = [0] * (count + 1)
1631 1636 index = self.index
1632 1637 for r in self:
1633 1638 ishead[r] = 1 # I may be an head
1634 1639 e = index[r]
1635 1640 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1636 1641 return [r for r, val in enumerate(ishead) if val]
1637 1642
1638 1643 def heads(self, start=None, stop=None):
1639 1644 """return the list of all nodes that have no children
1640 1645
1641 1646 if start is specified, only heads that are descendants of
1642 1647 start will be returned
1643 1648 if stop is specified, it will consider all the revs from stop
1644 1649 as if they had no children
1645 1650 """
1646 1651 if start is None and stop is None:
1647 1652 if not len(self):
1648 1653 return [nullid]
1649 1654 return [self.node(r) for r in self.headrevs()]
1650 1655
1651 1656 if start is None:
1652 1657 start = nullid
1653 1658 if stop is None:
1654 1659 stop = []
1655 1660 stoprevs = set([self.rev(n) for n in stop])
1656 1661 startrev = self.rev(start)
1657 1662 reachable = {startrev}
1658 1663 heads = {startrev}
1659 1664
1660 1665 parentrevs = self.parentrevs
1661 1666 for r in self.revs(start=startrev + 1):
1662 1667 for p in parentrevs(r):
1663 1668 if p in reachable:
1664 1669 if r not in stoprevs:
1665 1670 reachable.add(r)
1666 1671 heads.add(r)
1667 1672 if p in heads and p not in stoprevs:
1668 1673 heads.remove(p)
1669 1674
1670 1675 return [self.node(r) for r in heads]
1671 1676
1672 1677 def children(self, node):
1673 1678 """find the children of a given node"""
1674 1679 c = []
1675 1680 p = self.rev(node)
1676 1681 for r in self.revs(start=p + 1):
1677 1682 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1678 1683 if prevs:
1679 1684 for pr in prevs:
1680 1685 if pr == p:
1681 1686 c.append(self.node(r))
1682 1687 elif p == nullrev:
1683 1688 c.append(self.node(r))
1684 1689 return c
1685 1690
1686 1691 def commonancestorsheads(self, a, b):
1687 1692 """calculate all the heads of the common ancestors of nodes a and b"""
1688 1693 a, b = self.rev(a), self.rev(b)
1689 1694 ancs = self._commonancestorsheads(a, b)
1690 1695 return pycompat.maplist(self.node, ancs)
1691 1696
1692 1697 def _commonancestorsheads(self, *revs):
1693 1698 """calculate all the heads of the common ancestors of revs"""
1694 1699 try:
1695 1700 ancs = self.index.commonancestorsheads(*revs)
1696 1701 except (AttributeError, OverflowError): # C implementation failed
1697 1702 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1698 1703 return ancs
1699 1704
1700 1705 def isancestor(self, a, b):
1701 1706 """return True if node a is an ancestor of node b
1702 1707
1703 1708 A revision is considered an ancestor of itself."""
1704 1709 a, b = self.rev(a), self.rev(b)
1705 1710 return self.isancestorrev(a, b)
1706 1711
1707 1712 def descendant(self, a, b):
1708 1713 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1709 1714 util.nouideprecwarn(msg, '4.7')
1710 1715 return self.isancestorrev(a, b)
1711 1716
1712 1717 def isancestorrev(self, a, b):
1713 1718 """return True if revision a is an ancestor of revision b
1714 1719
1715 1720 A revision is considered an ancestor of itself.
1716 1721
1717 1722 The implementation of this is trivial but the use of
1718 1723 commonancestorsheads is not."""
1719 1724 if a == nullrev:
1720 1725 return True
1721 1726 elif a == b:
1722 1727 return True
1723 1728 elif a > b:
1724 1729 return False
1725 1730 return a in self._commonancestorsheads(a, b)
1726 1731
1727 1732 def ancestor(self, a, b):
1728 1733 """calculate the "best" common ancestor of nodes a and b"""
1729 1734
1730 1735 a, b = self.rev(a), self.rev(b)
1731 1736 try:
1732 1737 ancs = self.index.ancestors(a, b)
1733 1738 except (AttributeError, OverflowError):
1734 1739 ancs = ancestor.ancestors(self.parentrevs, a, b)
1735 1740 if ancs:
1736 1741 # choose a consistent winner when there's a tie
1737 1742 return min(map(self.node, ancs))
1738 1743 return nullid
1739 1744
1740 1745 def _match(self, id):
1741 1746 if isinstance(id, int):
1742 1747 # rev
1743 1748 return self.node(id)
1744 1749 if len(id) == 20:
1745 1750 # possibly a binary node
1746 1751 # odds of a binary node being all hex in ASCII are 1 in 10**25
1747 1752 try:
1748 1753 node = id
1749 1754 self.rev(node) # quick search the index
1750 1755 return node
1751 1756 except LookupError:
1752 1757 pass # may be partial hex id
1753 1758 try:
1754 1759 # str(rev)
1755 1760 rev = int(id)
1756 1761 if "%d" % rev != id:
1757 1762 raise ValueError
1758 1763 if rev < 0:
1759 1764 rev = len(self) + rev
1760 1765 if rev < 0 or rev >= len(self):
1761 1766 raise ValueError
1762 1767 return self.node(rev)
1763 1768 except (ValueError, OverflowError):
1764 1769 pass
1765 1770 if len(id) == 40:
1766 1771 try:
1767 1772 # a full hex nodeid?
1768 1773 node = bin(id)
1769 1774 self.rev(node)
1770 1775 return node
1771 1776 except (TypeError, LookupError):
1772 1777 pass
1773 1778
1774 1779 def _partialmatch(self, id):
1775 1780 # we don't care wdirfilenodeids as they should be always full hash
1776 1781 maybewdir = wdirhex.startswith(id)
1777 1782 try:
1778 1783 partial = self.index.partialmatch(id)
1779 1784 if partial and self.hasnode(partial):
1780 1785 if maybewdir:
1781 1786 # single 'ff...' match in radix tree, ambiguous with wdir
1782 1787 raise RevlogError
1783 1788 return partial
1784 1789 if maybewdir:
1785 1790 # no 'ff...' match in radix tree, wdir identified
1786 1791 raise error.WdirUnsupported
1787 1792 return None
1788 1793 except RevlogError:
1789 1794 # parsers.c radix tree lookup gave multiple matches
1790 1795 # fast path: for unfiltered changelog, radix tree is accurate
1791 1796 if not getattr(self, 'filteredrevs', None):
1792 1797 raise AmbiguousPrefixLookupError(id, self.indexfile,
1793 1798 _('ambiguous identifier'))
1794 1799 # fall through to slow path that filters hidden revisions
1795 1800 except (AttributeError, ValueError):
1796 1801 # we are pure python, or key was too short to search radix tree
1797 1802 pass
1798 1803
1799 1804 if id in self._pcache:
1800 1805 return self._pcache[id]
1801 1806
1802 1807 if len(id) <= 40:
1803 1808 try:
1804 1809 # hex(node)[:...]
1805 1810 l = len(id) // 2 # grab an even number of digits
1806 1811 prefix = bin(id[:l * 2])
1807 1812 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1808 1813 nl = [n for n in nl if hex(n).startswith(id) and
1809 1814 self.hasnode(n)]
1810 1815 if len(nl) > 0:
1811 1816 if len(nl) == 1 and not maybewdir:
1812 1817 self._pcache[id] = nl[0]
1813 1818 return nl[0]
1814 1819 raise AmbiguousPrefixLookupError(id, self.indexfile,
1815 1820 _('ambiguous identifier'))
1816 1821 if maybewdir:
1817 1822 raise error.WdirUnsupported
1818 1823 return None
1819 1824 except TypeError:
1820 1825 pass
1821 1826
1822 1827 def lookup(self, id):
1823 1828 """locate a node based on:
1824 1829 - revision number or str(revision number)
1825 1830 - nodeid or subset of hex nodeid
1826 1831 """
1827 1832 n = self._match(id)
1828 1833 if n is not None:
1829 1834 return n
1830 1835 n = self._partialmatch(id)
1831 1836 if n:
1832 1837 return n
1833 1838
1834 1839 raise LookupError(id, self.indexfile, _('no match found'))
1835 1840
1836 1841 def shortest(self, node, minlength=1):
1837 1842 """Find the shortest unambiguous prefix that matches node."""
1838 1843 def isvalid(prefix):
1839 1844 try:
1840 1845 node = self._partialmatch(prefix)
1841 1846 except error.RevlogError:
1842 1847 return False
1843 1848 except error.WdirUnsupported:
1844 1849 # single 'ff...' match
1845 1850 return True
1846 1851 if node is None:
1847 1852 raise LookupError(node, self.indexfile, _('no node'))
1848 1853 return True
1849 1854
1850 1855 def maybewdir(prefix):
1851 1856 return all(c == 'f' for c in prefix)
1852 1857
1853 1858 hexnode = hex(node)
1854 1859
1855 1860 def disambiguate(hexnode, minlength):
1856 1861 """Disambiguate against wdirid."""
1857 1862 for length in range(minlength, 41):
1858 1863 prefix = hexnode[:length]
1859 1864 if not maybewdir(prefix):
1860 1865 return prefix
1861 1866
1862 1867 if not getattr(self, 'filteredrevs', None):
1863 1868 try:
1864 1869 length = max(self.index.shortest(node), minlength)
1865 1870 return disambiguate(hexnode, length)
1866 1871 except RevlogError:
1867 1872 if node != wdirid:
1868 1873 raise LookupError(node, self.indexfile, _('no node'))
1869 1874 except AttributeError:
1870 1875 # Fall through to pure code
1871 1876 pass
1872 1877
1873 1878 if node == wdirid:
1874 1879 for length in range(minlength, 41):
1875 1880 prefix = hexnode[:length]
1876 1881 if isvalid(prefix):
1877 1882 return prefix
1878 1883
1879 1884 for length in range(minlength, 41):
1880 1885 prefix = hexnode[:length]
1881 1886 if isvalid(prefix):
1882 1887 return disambiguate(hexnode, length)
1883 1888
1884 1889 def cmp(self, node, text):
1885 1890 """compare text with a given file revision
1886 1891
1887 1892 returns True if text is different than what is stored.
1888 1893 """
1889 1894 p1, p2 = self.parents(node)
1890 1895 return hash(text, p1, p2) != node
1891 1896
1892 1897 def _cachesegment(self, offset, data):
1893 1898 """Add a segment to the revlog cache.
1894 1899
1895 1900 Accepts an absolute offset and the data that is at that location.
1896 1901 """
1897 1902 o, d = self._chunkcache
1898 1903 # try to add to existing cache
1899 1904 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1900 1905 self._chunkcache = o, d + data
1901 1906 else:
1902 1907 self._chunkcache = offset, data
1903 1908
1904 1909 def _readsegment(self, offset, length, df=None):
1905 1910 """Load a segment of raw data from the revlog.
1906 1911
1907 1912 Accepts an absolute offset, length to read, and an optional existing
1908 1913 file handle to read from.
1909 1914
1910 1915 If an existing file handle is passed, it will be seeked and the
1911 1916 original seek position will NOT be restored.
1912 1917
1913 1918 Returns a str or buffer of raw byte data.
1914 1919 """
1915 1920 # Cache data both forward and backward around the requested
1916 1921 # data, in a fixed size window. This helps speed up operations
1917 1922 # involving reading the revlog backwards.
1918 1923 cachesize = self._chunkcachesize
1919 1924 realoffset = offset & ~(cachesize - 1)
1920 1925 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1921 1926 - realoffset)
1922 1927 with self._datareadfp(df) as df:
1923 1928 df.seek(realoffset)
1924 1929 d = df.read(reallength)
1925 1930 self._cachesegment(realoffset, d)
1926 1931 if offset != realoffset or reallength != length:
1927 1932 return util.buffer(d, offset - realoffset, length)
1928 1933 return d
1929 1934
1930 1935 def _getsegment(self, offset, length, df=None):
1931 1936 """Obtain a segment of raw data from the revlog.
1932 1937
1933 1938 Accepts an absolute offset, length of bytes to obtain, and an
1934 1939 optional file handle to the already-opened revlog. If the file
1935 1940 handle is used, it's original seek position will not be preserved.
1936 1941
1937 1942 Requests for data may be returned from a cache.
1938 1943
1939 1944 Returns a str or a buffer instance of raw byte data.
1940 1945 """
1941 1946 o, d = self._chunkcache
1942 1947 l = len(d)
1943 1948
1944 1949 # is it in the cache?
1945 1950 cachestart = offset - o
1946 1951 cacheend = cachestart + length
1947 1952 if cachestart >= 0 and cacheend <= l:
1948 1953 if cachestart == 0 and cacheend == l:
1949 1954 return d # avoid a copy
1950 1955 return util.buffer(d, cachestart, cacheend - cachestart)
1951 1956
1952 1957 return self._readsegment(offset, length, df=df)
1953 1958
1954 1959 def _getsegmentforrevs(self, startrev, endrev, df=None):
1955 1960 """Obtain a segment of raw data corresponding to a range of revisions.
1956 1961
1957 1962 Accepts the start and end revisions and an optional already-open
1958 1963 file handle to be used for reading. If the file handle is read, its
1959 1964 seek position will not be preserved.
1960 1965
1961 1966 Requests for data may be satisfied by a cache.
1962 1967
1963 1968 Returns a 2-tuple of (offset, data) for the requested range of
1964 1969 revisions. Offset is the integer offset from the beginning of the
1965 1970 revlog and data is a str or buffer of the raw byte data.
1966 1971
1967 1972 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1968 1973 to determine where each revision's data begins and ends.
1969 1974 """
1970 1975 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1971 1976 # (functions are expensive).
1972 1977 index = self.index
1973 1978 istart = index[startrev]
1974 1979 start = int(istart[0] >> 16)
1975 1980 if startrev == endrev:
1976 1981 end = start + istart[1]
1977 1982 else:
1978 1983 iend = index[endrev]
1979 1984 end = int(iend[0] >> 16) + iend[1]
1980 1985
1981 1986 if self._inline:
1982 1987 start += (startrev + 1) * self._io.size
1983 1988 end += (endrev + 1) * self._io.size
1984 1989 length = end - start
1985 1990
1986 1991 return start, self._getsegment(start, length, df=df)
1987 1992
1988 1993 def _chunk(self, rev, df=None):
1989 1994 """Obtain a single decompressed chunk for a revision.
1990 1995
1991 1996 Accepts an integer revision and an optional already-open file handle
1992 1997 to be used for reading. If used, the seek position of the file will not
1993 1998 be preserved.
1994 1999
1995 2000 Returns a str holding uncompressed data for the requested revision.
1996 2001 """
1997 2002 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1998 2003
1999 2004 def _chunks(self, revs, df=None, targetsize=None):
2000 2005 """Obtain decompressed chunks for the specified revisions.
2001 2006
2002 2007 Accepts an iterable of numeric revisions that are assumed to be in
2003 2008 ascending order. Also accepts an optional already-open file handle
2004 2009 to be used for reading. If used, the seek position of the file will
2005 2010 not be preserved.
2006 2011
2007 2012 This function is similar to calling ``self._chunk()`` multiple times,
2008 2013 but is faster.
2009 2014
2010 2015 Returns a list with decompressed data for each requested revision.
2011 2016 """
2012 2017 if not revs:
2013 2018 return []
2014 2019 start = self.start
2015 2020 length = self.length
2016 2021 inline = self._inline
2017 2022 iosize = self._io.size
2018 2023 buffer = util.buffer
2019 2024
2020 2025 l = []
2021 2026 ladd = l.append
2022 2027
2023 2028 if not self._withsparseread:
2024 2029 slicedchunks = (revs,)
2025 2030 else:
2026 2031 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2027 2032
2028 2033 for revschunk in slicedchunks:
2029 2034 firstrev = revschunk[0]
2030 2035 # Skip trailing revisions with empty diff
2031 2036 for lastrev in revschunk[::-1]:
2032 2037 if length(lastrev) != 0:
2033 2038 break
2034 2039
2035 2040 try:
2036 2041 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2037 2042 except OverflowError:
2038 2043 # issue4215 - we can't cache a run of chunks greater than
2039 2044 # 2G on Windows
2040 2045 return [self._chunk(rev, df=df) for rev in revschunk]
2041 2046
2042 2047 decomp = self.decompress
2043 2048 for rev in revschunk:
2044 2049 chunkstart = start(rev)
2045 2050 if inline:
2046 2051 chunkstart += (rev + 1) * iosize
2047 2052 chunklength = length(rev)
2048 2053 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2049 2054
2050 2055 return l
2051 2056
2052 2057 def _chunkclear(self):
2053 2058 """Clear the raw chunk cache."""
2054 2059 self._chunkcache = (0, '')
2055 2060
2056 2061 def deltaparent(self, rev):
2057 2062 """return deltaparent of the given revision"""
2058 2063 base = self.index[rev][3]
2059 2064 if base == rev:
2060 2065 return nullrev
2061 2066 elif self._generaldelta:
2062 2067 return base
2063 2068 else:
2064 2069 return rev - 1
2065 2070
2066 2071 def revdiff(self, rev1, rev2):
2067 2072 """return or calculate a delta between two revisions
2068 2073
2069 2074 The delta calculated is in binary form and is intended to be written to
2070 2075 revlog data directly. So this function needs raw revision data.
2071 2076 """
2072 2077 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2073 2078 return bytes(self._chunk(rev2))
2074 2079
2075 2080 return mdiff.textdiff(self.revision(rev1, raw=True),
2076 2081 self.revision(rev2, raw=True))
2077 2082
2078 2083 def revision(self, nodeorrev, _df=None, raw=False):
2079 2084 """return an uncompressed revision of a given node or revision
2080 2085 number.
2081 2086
2082 2087 _df - an existing file handle to read from. (internal-only)
2083 2088 raw - an optional argument specifying if the revision data is to be
2084 2089 treated as raw data when applying flag transforms. 'raw' should be set
2085 2090 to True when generating changegroups or in debug commands.
2086 2091 """
2087 2092 if isinstance(nodeorrev, int):
2088 2093 rev = nodeorrev
2089 2094 node = self.node(rev)
2090 2095 else:
2091 2096 node = nodeorrev
2092 2097 rev = None
2093 2098
2094 2099 cachedrev = None
2095 2100 flags = None
2096 2101 rawtext = None
2097 2102 if node == nullid:
2098 2103 return ""
2099 2104 if self._cache:
2100 2105 if self._cache[0] == node:
2101 2106 # _cache only stores rawtext
2102 2107 if raw:
2103 2108 return self._cache[2]
2104 2109 # duplicated, but good for perf
2105 2110 if rev is None:
2106 2111 rev = self.rev(node)
2107 2112 if flags is None:
2108 2113 flags = self.flags(rev)
2109 2114 # no extra flags set, no flag processor runs, text = rawtext
2110 2115 if flags == REVIDX_DEFAULT_FLAGS:
2111 2116 return self._cache[2]
2112 2117 # rawtext is reusable. need to run flag processor
2113 2118 rawtext = self._cache[2]
2114 2119
2115 2120 cachedrev = self._cache[1]
2116 2121
2117 2122 # look up what we need to read
2118 2123 if rawtext is None:
2119 2124 if rev is None:
2120 2125 rev = self.rev(node)
2121 2126
2122 2127 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2123 2128 if stopped:
2124 2129 rawtext = self._cache[2]
2125 2130
2126 2131 # drop cache to save memory
2127 2132 self._cache = None
2128 2133
2129 2134 targetsize = None
2130 2135 rawsize = self.index[rev][2]
2131 2136 if 0 <= rawsize:
2132 2137 targetsize = 4 * rawsize
2133 2138
2134 2139 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2135 2140 if rawtext is None:
2136 2141 rawtext = bytes(bins[0])
2137 2142 bins = bins[1:]
2138 2143
2139 2144 rawtext = mdiff.patches(rawtext, bins)
2140 2145 self._cache = (node, rev, rawtext)
2141 2146
2142 2147 if flags is None:
2143 2148 if rev is None:
2144 2149 rev = self.rev(node)
2145 2150 flags = self.flags(rev)
2146 2151
2147 2152 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2148 2153 if validatehash:
2149 2154 self.checkhash(text, node, rev=rev)
2150 2155
2151 2156 return text
2152 2157
2153 2158 def hash(self, text, p1, p2):
2154 2159 """Compute a node hash.
2155 2160
2156 2161 Available as a function so that subclasses can replace the hash
2157 2162 as needed.
2158 2163 """
2159 2164 return hash(text, p1, p2)
2160 2165
2161 2166 def _processflags(self, text, flags, operation, raw=False):
2162 2167 """Inspect revision data flags and applies transforms defined by
2163 2168 registered flag processors.
2164 2169
2165 2170 ``text`` - the revision data to process
2166 2171 ``flags`` - the revision flags
2167 2172 ``operation`` - the operation being performed (read or write)
2168 2173 ``raw`` - an optional argument describing if the raw transform should be
2169 2174 applied.
2170 2175
2171 2176 This method processes the flags in the order (or reverse order if
2172 2177 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2173 2178 flag processors registered for present flags. The order of flags defined
2174 2179 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2175 2180
2176 2181 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2177 2182 processed text and ``validatehash`` is a bool indicating whether the
2178 2183 returned text should be checked for hash integrity.
2179 2184
2180 2185 Note: If the ``raw`` argument is set, it has precedence over the
2181 2186 operation and will only update the value of ``validatehash``.
2182 2187 """
2183 2188 # fast path: no flag processors will run
2184 2189 if flags == 0:
2185 2190 return text, True
2186 2191 if not operation in ('read', 'write'):
2187 2192 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2188 2193 # Check all flags are known.
2189 2194 if flags & ~REVIDX_KNOWN_FLAGS:
2190 2195 raise RevlogError(_("incompatible revision flag '%#x'") %
2191 2196 (flags & ~REVIDX_KNOWN_FLAGS))
2192 2197 validatehash = True
2193 2198 # Depending on the operation (read or write), the order might be
2194 2199 # reversed due to non-commutative transforms.
2195 2200 orderedflags = REVIDX_FLAGS_ORDER
2196 2201 if operation == 'write':
2197 2202 orderedflags = reversed(orderedflags)
2198 2203
2199 2204 for flag in orderedflags:
2200 2205 # If a flagprocessor has been registered for a known flag, apply the
2201 2206 # related operation transform and update result tuple.
2202 2207 if flag & flags:
2203 2208 vhash = True
2204 2209
2205 2210 if flag not in _flagprocessors:
2206 2211 message = _("missing processor for flag '%#x'") % (flag)
2207 2212 raise RevlogError(message)
2208 2213
2209 2214 processor = _flagprocessors[flag]
2210 2215 if processor is not None:
2211 2216 readtransform, writetransform, rawtransform = processor
2212 2217
2213 2218 if raw:
2214 2219 vhash = rawtransform(self, text)
2215 2220 elif operation == 'read':
2216 2221 text, vhash = readtransform(self, text)
2217 2222 else: # write operation
2218 2223 text, vhash = writetransform(self, text)
2219 2224 validatehash = validatehash and vhash
2220 2225
2221 2226 return text, validatehash
2222 2227
2223 2228 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2224 2229 """Check node hash integrity.
2225 2230
2226 2231 Available as a function so that subclasses can extend hash mismatch
2227 2232 behaviors as needed.
2228 2233 """
2229 2234 try:
2230 2235 if p1 is None and p2 is None:
2231 2236 p1, p2 = self.parents(node)
2232 2237 if node != self.hash(text, p1, p2):
2233 2238 revornode = rev
2234 2239 if revornode is None:
2235 2240 revornode = templatefilters.short(hex(node))
2236 2241 raise RevlogError(_("integrity check failed on %s:%s")
2237 2242 % (self.indexfile, pycompat.bytestr(revornode)))
2238 2243 except RevlogError:
2239 2244 if self._censorable and _censoredtext(text):
2240 2245 raise error.CensoredNodeError(self.indexfile, node, text)
2241 2246 raise
2242 2247
2243 2248 def _enforceinlinesize(self, tr, fp=None):
2244 2249 """Check if the revlog is too big for inline and convert if so.
2245 2250
2246 2251 This should be called after revisions are added to the revlog. If the
2247 2252 revlog has grown too large to be an inline revlog, it will convert it
2248 2253 to use multiple index and data files.
2249 2254 """
2250 2255 tiprev = len(self) - 1
2251 2256 if (not self._inline or
2252 2257 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2253 2258 return
2254 2259
2255 2260 trinfo = tr.find(self.indexfile)
2256 2261 if trinfo is None:
2257 2262 raise RevlogError(_("%s not found in the transaction")
2258 2263 % self.indexfile)
2259 2264
2260 2265 trindex = trinfo[2]
2261 2266 if trindex is not None:
2262 2267 dataoff = self.start(trindex)
2263 2268 else:
2264 2269 # revlog was stripped at start of transaction, use all leftover data
2265 2270 trindex = len(self) - 1
2266 2271 dataoff = self.end(tiprev)
2267 2272
2268 2273 tr.add(self.datafile, dataoff)
2269 2274
2270 2275 if fp:
2271 2276 fp.flush()
2272 2277 fp.close()
2273 2278
2274 2279 with self._datafp('w') as df:
2275 2280 for r in self:
2276 2281 df.write(self._getsegmentforrevs(r, r)[1])
2277 2282
2278 2283 with self._indexfp('w') as fp:
2279 2284 self.version &= ~FLAG_INLINE_DATA
2280 2285 self._inline = False
2281 2286 io = self._io
2282 2287 for i in self:
2283 2288 e = io.packentry(self.index[i], self.node, self.version, i)
2284 2289 fp.write(e)
2285 2290
2286 2291 # the temp file replace the real index when we exit the context
2287 2292 # manager
2288 2293
2289 2294 tr.replace(self.indexfile, trindex * self._io.size)
2290 2295 self._chunkclear()
2291 2296
2292 2297 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2293 2298 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2294 2299 """add a revision to the log
2295 2300
2296 2301 text - the revision data to add
2297 2302 transaction - the transaction object used for rollback
2298 2303 link - the linkrev data to add
2299 2304 p1, p2 - the parent nodeids of the revision
2300 2305 cachedelta - an optional precomputed delta
2301 2306 node - nodeid of revision; typically node is not specified, and it is
2302 2307 computed by default as hash(text, p1, p2), however subclasses might
2303 2308 use different hashing method (and override checkhash() in such case)
2304 2309 flags - the known flags to set on the revision
2305 2310 deltacomputer - an optional _deltacomputer instance shared between
2306 2311 multiple calls
2307 2312 """
2308 2313 if link == nullrev:
2309 2314 raise RevlogError(_("attempted to add linkrev -1 to %s")
2310 2315 % self.indexfile)
2311 2316
2312 2317 if flags:
2313 2318 node = node or self.hash(text, p1, p2)
2314 2319
2315 2320 rawtext, validatehash = self._processflags(text, flags, 'write')
2316 2321
2317 2322 # If the flag processor modifies the revision data, ignore any provided
2318 2323 # cachedelta.
2319 2324 if rawtext != text:
2320 2325 cachedelta = None
2321 2326
2322 2327 if len(rawtext) > _maxentrysize:
2323 2328 raise RevlogError(
2324 2329 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2325 2330 % (self.indexfile, len(rawtext)))
2326 2331
2327 2332 node = node or self.hash(rawtext, p1, p2)
2328 2333 if node in self.nodemap:
2329 2334 return node
2330 2335
2331 2336 if validatehash:
2332 2337 self.checkhash(rawtext, node, p1=p1, p2=p2)
2333 2338
2334 2339 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2335 2340 flags, cachedelta=cachedelta,
2336 2341 deltacomputer=deltacomputer)
2337 2342
2338 2343 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2339 2344 cachedelta=None, deltacomputer=None):
2340 2345 """add a raw revision with known flags, node and parents
2341 2346 useful when reusing a revision not stored in this revlog (ex: received
2342 2347 over wire, or read from an external bundle).
2343 2348 """
2344 2349 dfh = None
2345 2350 if not self._inline:
2346 2351 dfh = self._datafp("a+")
2347 2352 ifh = self._indexfp("a+")
2348 2353 try:
2349 2354 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2350 2355 flags, cachedelta, ifh, dfh,
2351 2356 deltacomputer=deltacomputer)
2352 2357 finally:
2353 2358 if dfh:
2354 2359 dfh.close()
2355 2360 ifh.close()
2356 2361
2357 2362 def compress(self, data):
2358 2363 """Generate a possibly-compressed representation of data."""
2359 2364 if not data:
2360 2365 return '', data
2361 2366
2362 2367 compressed = self._compressor.compress(data)
2363 2368
2364 2369 if compressed:
2365 2370 # The revlog compressor added the header in the returned data.
2366 2371 return '', compressed
2367 2372
2368 2373 if data[0:1] == '\0':
2369 2374 return '', data
2370 2375 return 'u', data
2371 2376
2372 2377 def decompress(self, data):
2373 2378 """Decompress a revlog chunk.
2374 2379
2375 2380 The chunk is expected to begin with a header identifying the
2376 2381 format type so it can be routed to an appropriate decompressor.
2377 2382 """
2378 2383 if not data:
2379 2384 return data
2380 2385
2381 2386 # Revlogs are read much more frequently than they are written and many
2382 2387 # chunks only take microseconds to decompress, so performance is
2383 2388 # important here.
2384 2389 #
2385 2390 # We can make a few assumptions about revlogs:
2386 2391 #
2387 2392 # 1) the majority of chunks will be compressed (as opposed to inline
2388 2393 # raw data).
2389 2394 # 2) decompressing *any* data will likely by at least 10x slower than
2390 2395 # returning raw inline data.
2391 2396 # 3) we want to prioritize common and officially supported compression
2392 2397 # engines
2393 2398 #
2394 2399 # It follows that we want to optimize for "decompress compressed data
2395 2400 # when encoded with common and officially supported compression engines"
2396 2401 # case over "raw data" and "data encoded by less common or non-official
2397 2402 # compression engines." That is why we have the inline lookup first
2398 2403 # followed by the compengines lookup.
2399 2404 #
2400 2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2401 2406 # compressed chunks. And this matters for changelog and manifest reads.
2402 2407 t = data[0:1]
2403 2408
2404 2409 if t == 'x':
2405 2410 try:
2406 2411 return _zlibdecompress(data)
2407 2412 except zlib.error as e:
2408 2413 raise RevlogError(_('revlog decompress error: %s') %
2409 2414 stringutil.forcebytestr(e))
2410 2415 # '\0' is more common than 'u' so it goes first.
2411 2416 elif t == '\0':
2412 2417 return data
2413 2418 elif t == 'u':
2414 2419 return util.buffer(data, 1)
2415 2420
2416 2421 try:
2417 2422 compressor = self._decompressors[t]
2418 2423 except KeyError:
2419 2424 try:
2420 2425 engine = util.compengines.forrevlogheader(t)
2421 2426 compressor = engine.revlogcompressor()
2422 2427 self._decompressors[t] = compressor
2423 2428 except KeyError:
2424 2429 raise RevlogError(_('unknown compression type %r') % t)
2425 2430
2426 2431 return compressor.decompress(data)
2427 2432
2428 2433 def _isgooddeltainfo(self, deltainfo, revinfo):
2429 2434 """Returns True if the given delta is good. Good means that it is within
2430 2435 the disk span, disk size, and chain length bounds that we know to be
2431 2436 performant."""
2432 2437 if deltainfo is None:
2433 2438 return False
2434 2439
2435 2440 # - 'deltainfo.distance' is the distance from the base revision --
2436 2441 # bounding it limits the amount of I/O we need to do.
2437 2442 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2438 2443 # deltas we need to apply -- bounding it limits the amount of CPU
2439 2444 # we consume.
2440 2445
2441 2446 if self._sparserevlog:
2442 2447 # As sparse-read will be used, we can consider that the distance,
2443 2448 # instead of being the span of the whole chunk,
2444 2449 # is the span of the largest read chunk
2445 2450 base = deltainfo.base
2446 2451
2447 2452 if base != nullrev:
2448 2453 deltachain = self._deltachain(base)[0]
2449 2454 else:
2450 2455 deltachain = []
2451 2456
2452 2457 chunks = _slicechunk(self, deltachain, deltainfo)
2453 2458 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2454 2459 else:
2455 2460 distance = deltainfo.distance
2456 2461
2457 2462 textlen = revinfo.textlen
2458 2463 defaultmax = textlen * 4
2459 2464 maxdist = self._maxdeltachainspan
2460 2465 if not maxdist:
2461 2466 maxdist = distance # ensure the conditional pass
2462 2467 maxdist = max(maxdist, defaultmax)
2463 2468 if self._sparserevlog and maxdist < self._srmingapsize:
2464 2469 # In multiple place, we are ignoring irrelevant data range below a
2465 2470 # certain size. Be also apply this tradeoff here and relax span
2466 2471 # constraint for small enought content.
2467 2472 maxdist = self._srmingapsize
2468 2473 if (distance > maxdist or deltainfo.deltalen > textlen or
2469 2474 deltainfo.compresseddeltalen > textlen * 2 or
2470 2475 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2471 2476 return False
2472 2477
2473 2478 return True
2474 2479
2475 2480 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2476 2481 cachedelta, ifh, dfh, alwayscache=False,
2477 2482 deltacomputer=None):
2478 2483 """internal function to add revisions to the log
2479 2484
2480 2485 see addrevision for argument descriptions.
2481 2486
2482 2487 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2483 2488
2484 2489 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2485 2490 be used.
2486 2491
2487 2492 invariants:
2488 2493 - rawtext is optional (can be None); if not set, cachedelta must be set.
2489 2494 if both are set, they must correspond to each other.
2490 2495 """
2491 2496 if node == nullid:
2492 2497 raise RevlogError(_("%s: attempt to add null revision") %
2493 2498 (self.indexfile))
2494 2499 if node == wdirid or node in wdirfilenodeids:
2495 2500 raise RevlogError(_("%s: attempt to add wdir revision") %
2496 2501 (self.indexfile))
2497 2502
2498 2503 if self._inline:
2499 2504 fh = ifh
2500 2505 else:
2501 2506 fh = dfh
2502 2507
2503 2508 btext = [rawtext]
2504 2509
2505 2510 curr = len(self)
2506 2511 prev = curr - 1
2507 2512 offset = self.end(prev)
2508 2513 p1r, p2r = self.rev(p1), self.rev(p2)
2509 2514
2510 2515 # full versions are inserted when the needed deltas
2511 2516 # become comparable to the uncompressed text
2512 2517 if rawtext is None:
2513 2518 # need rawtext size, before changed by flag processors, which is
2514 2519 # the non-raw size. use revlog explicitly to avoid filelog's extra
2515 2520 # logic that might remove metadata size.
2516 2521 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2517 2522 cachedelta[1])
2518 2523 else:
2519 2524 textlen = len(rawtext)
2520 2525
2521 2526 if deltacomputer is None:
2522 2527 deltacomputer = _deltacomputer(self)
2523 2528
2524 2529 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2525 2530
2526 2531 # no delta for flag processor revision (see "candelta" for why)
2527 2532 # not calling candelta since only one revision needs test, also to
2528 2533 # avoid overhead fetching flags again.
2529 2534 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2530 2535 deltainfo = None
2531 2536 else:
2532 2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2533 2538
2534 2539 if deltainfo is not None:
2535 2540 base = deltainfo.base
2536 2541 chainbase = deltainfo.chainbase
2537 2542 data = deltainfo.data
2538 2543 l = deltainfo.deltalen
2539 2544 else:
2540 2545 rawtext = deltacomputer.buildtext(revinfo, fh)
2541 2546 data = self.compress(rawtext)
2542 2547 l = len(data[1]) + len(data[0])
2543 2548 base = chainbase = curr
2544 2549
2545 2550 e = (offset_type(offset, flags), l, textlen,
2546 2551 base, link, p1r, p2r, node)
2547 2552 self.index.insert(-1, e)
2548 2553 self.nodemap[node] = curr
2549 2554
2550 2555 entry = self._io.packentry(e, self.node, self.version, curr)
2551 2556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2552 2557
2553 2558 if alwayscache and rawtext is None:
2554 2559 rawtext = deltacomputer._buildtext(revinfo, fh)
2555 2560
2556 2561 if type(rawtext) == bytes: # only accept immutable objects
2557 2562 self._cache = (node, curr, rawtext)
2558 2563 self._chainbasecache[curr] = chainbase
2559 2564 return node
2560 2565
2561 2566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2562 2567 # Files opened in a+ mode have inconsistent behavior on various
2563 2568 # platforms. Windows requires that a file positioning call be made
2564 2569 # when the file handle transitions between reads and writes. See
2565 2570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2566 2571 # platforms, Python or the platform itself can be buggy. Some versions
2567 2572 # of Solaris have been observed to not append at the end of the file
2568 2573 # if the file was seeked to before the end. See issue4943 for more.
2569 2574 #
2570 2575 # We work around this issue by inserting a seek() before writing.
2571 2576 # Note: This is likely not necessary on Python 3.
2572 2577 ifh.seek(0, os.SEEK_END)
2573 2578 if dfh:
2574 2579 dfh.seek(0, os.SEEK_END)
2575 2580
2576 2581 curr = len(self) - 1
2577 2582 if not self._inline:
2578 2583 transaction.add(self.datafile, offset)
2579 2584 transaction.add(self.indexfile, curr * len(entry))
2580 2585 if data[0]:
2581 2586 dfh.write(data[0])
2582 2587 dfh.write(data[1])
2583 2588 ifh.write(entry)
2584 2589 else:
2585 2590 offset += curr * self._io.size
2586 2591 transaction.add(self.indexfile, offset, curr)
2587 2592 ifh.write(entry)
2588 2593 ifh.write(data[0])
2589 2594 ifh.write(data[1])
2590 2595 self._enforceinlinesize(transaction, ifh)
2591 2596
2592 2597 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2593 2598 """
2594 2599 add a delta group
2595 2600
2596 2601 given a set of deltas, add them to the revision log. the
2597 2602 first delta is against its parent, which should be in our
2598 2603 log, the rest are against the previous delta.
2599 2604
2600 2605 If ``addrevisioncb`` is defined, it will be called with arguments of
2601 2606 this revlog and the node that was added.
2602 2607 """
2603 2608
2604 2609 nodes = []
2605 2610
2606 2611 r = len(self)
2607 2612 end = 0
2608 2613 if r:
2609 2614 end = self.end(r - 1)
2610 2615 ifh = self._indexfp("a+")
2611 2616 isize = r * self._io.size
2612 2617 if self._inline:
2613 2618 transaction.add(self.indexfile, end + isize, r)
2614 2619 dfh = None
2615 2620 else:
2616 2621 transaction.add(self.indexfile, isize, r)
2617 2622 transaction.add(self.datafile, end)
2618 2623 dfh = self._datafp("a+")
2619 2624 def flush():
2620 2625 if dfh:
2621 2626 dfh.flush()
2622 2627 ifh.flush()
2623 2628 try:
2624 2629 deltacomputer = _deltacomputer(self)
2625 2630 # loop through our set of deltas
2626 2631 for data in deltas:
2627 2632 node, p1, p2, linknode, deltabase, delta, flags = data
2628 2633 link = linkmapper(linknode)
2629 2634 flags = flags or REVIDX_DEFAULT_FLAGS
2630 2635
2631 2636 nodes.append(node)
2632 2637
2633 2638 if node in self.nodemap:
2634 2639 # this can happen if two branches make the same change
2635 2640 continue
2636 2641
2637 2642 for p in (p1, p2):
2638 2643 if p not in self.nodemap:
2639 2644 raise LookupError(p, self.indexfile,
2640 2645 _('unknown parent'))
2641 2646
2642 2647 if deltabase not in self.nodemap:
2643 2648 raise LookupError(deltabase, self.indexfile,
2644 2649 _('unknown delta base'))
2645 2650
2646 2651 baserev = self.rev(deltabase)
2647 2652
2648 2653 if baserev != nullrev and self.iscensored(baserev):
2649 2654 # if base is censored, delta must be full replacement in a
2650 2655 # single patch operation
2651 2656 hlen = struct.calcsize(">lll")
2652 2657 oldlen = self.rawsize(baserev)
2653 2658 newlen = len(delta) - hlen
2654 2659 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2655 2660 raise error.CensoredBaseError(self.indexfile,
2656 2661 self.node(baserev))
2657 2662
2658 2663 if not flags and self._peek_iscensored(baserev, delta, flush):
2659 2664 flags |= REVIDX_ISCENSORED
2660 2665
2661 2666 # We assume consumers of addrevisioncb will want to retrieve
2662 2667 # the added revision, which will require a call to
2663 2668 # revision(). revision() will fast path if there is a cache
2664 2669 # hit. So, we tell _addrevision() to always cache in this case.
2665 2670 # We're only using addgroup() in the context of changegroup
2666 2671 # generation so the revision data can always be handled as raw
2667 2672 # by the flagprocessor.
2668 2673 self._addrevision(node, None, transaction, link,
2669 2674 p1, p2, flags, (baserev, delta),
2670 2675 ifh, dfh,
2671 2676 alwayscache=bool(addrevisioncb),
2672 2677 deltacomputer=deltacomputer)
2673 2678
2674 2679 if addrevisioncb:
2675 2680 addrevisioncb(self, node)
2676 2681
2677 2682 if not dfh and not self._inline:
2678 2683 # addrevision switched from inline to conventional
2679 2684 # reopen the index
2680 2685 ifh.close()
2681 2686 dfh = self._datafp("a+")
2682 2687 ifh = self._indexfp("a+")
2683 2688 finally:
2684 2689 if dfh:
2685 2690 dfh.close()
2686 2691 ifh.close()
2687 2692
2688 2693 return nodes
2689 2694
2690 2695 def iscensored(self, rev):
2691 2696 """Check if a file revision is censored."""
2692 2697 if not self._censorable:
2693 2698 return False
2694 2699
2695 2700 return self.flags(rev) & REVIDX_ISCENSORED
2696 2701
2697 2702 def _peek_iscensored(self, baserev, delta, flush):
2698 2703 """Quickly check if a delta produces a censored revision."""
2699 2704 if not self._censorable:
2700 2705 return False
2701 2706
2702 2707 # Fragile heuristic: unless new file meta keys are added alphabetically
2703 2708 # preceding "censored", all censored revisions are prefixed by
2704 2709 # "\1\ncensored:". A delta producing such a censored revision must be a
2705 2710 # full-replacement delta, so we inspect the first and only patch in the
2706 2711 # delta for this prefix.
2707 2712 hlen = struct.calcsize(">lll")
2708 2713 if len(delta) <= hlen:
2709 2714 return False
2710 2715
2711 2716 oldlen = self.rawsize(baserev)
2712 2717 newlen = len(delta) - hlen
2713 2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2714 2719 return False
2715 2720
2716 2721 add = "\1\ncensored:"
2717 2722 addlen = len(add)
2718 2723 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2719 2724
2720 2725 def getstrippoint(self, minlink):
2721 2726 """find the minimum rev that must be stripped to strip the linkrev
2722 2727
2723 2728 Returns a tuple containing the minimum rev and a set of all revs that
2724 2729 have linkrevs that will be broken by this strip.
2725 2730 """
2726 2731 brokenrevs = set()
2727 2732 strippoint = len(self)
2728 2733
2729 2734 heads = {}
2730 2735 futurelargelinkrevs = set()
2731 2736 for head in self.headrevs():
2732 2737 headlinkrev = self.linkrev(head)
2733 2738 heads[head] = headlinkrev
2734 2739 if headlinkrev >= minlink:
2735 2740 futurelargelinkrevs.add(headlinkrev)
2736 2741
2737 2742 # This algorithm involves walking down the rev graph, starting at the
2738 2743 # heads. Since the revs are topologically sorted according to linkrev,
2739 2744 # once all head linkrevs are below the minlink, we know there are
2740 2745 # no more revs that could have a linkrev greater than minlink.
2741 2746 # So we can stop walking.
2742 2747 while futurelargelinkrevs:
2743 2748 strippoint -= 1
2744 2749 linkrev = heads.pop(strippoint)
2745 2750
2746 2751 if linkrev < minlink:
2747 2752 brokenrevs.add(strippoint)
2748 2753 else:
2749 2754 futurelargelinkrevs.remove(linkrev)
2750 2755
2751 2756 for p in self.parentrevs(strippoint):
2752 2757 if p != nullrev:
2753 2758 plinkrev = self.linkrev(p)
2754 2759 heads[p] = plinkrev
2755 2760 if plinkrev >= minlink:
2756 2761 futurelargelinkrevs.add(plinkrev)
2757 2762
2758 2763 return strippoint, brokenrevs
2759 2764
2760 2765 def strip(self, minlink, transaction):
2761 2766 """truncate the revlog on the first revision with a linkrev >= minlink
2762 2767
2763 2768 This function is called when we're stripping revision minlink and
2764 2769 its descendants from the repository.
2765 2770
2766 2771 We have to remove all revisions with linkrev >= minlink, because
2767 2772 the equivalent changelog revisions will be renumbered after the
2768 2773 strip.
2769 2774
2770 2775 So we truncate the revlog on the first of these revisions, and
2771 2776 trust that the caller has saved the revisions that shouldn't be
2772 2777 removed and that it'll re-add them after this truncation.
2773 2778 """
2774 2779 if len(self) == 0:
2775 2780 return
2776 2781
2777 2782 rev, _ = self.getstrippoint(minlink)
2778 2783 if rev == len(self):
2779 2784 return
2780 2785
2781 2786 # first truncate the files on disk
2782 2787 end = self.start(rev)
2783 2788 if not self._inline:
2784 2789 transaction.add(self.datafile, end)
2785 2790 end = rev * self._io.size
2786 2791 else:
2787 2792 end += rev * self._io.size
2788 2793
2789 2794 transaction.add(self.indexfile, end)
2790 2795
2791 2796 # then reset internal state in memory to forget those revisions
2792 2797 self._cache = None
2793 2798 self._chaininfocache = {}
2794 2799 self._chunkclear()
2795 2800 for x in pycompat.xrange(rev, len(self)):
2796 2801 del self.nodemap[self.node(x)]
2797 2802
2798 2803 del self.index[rev:-1]
2799 2804 self._nodepos = None
2800 2805
2801 2806 def checksize(self):
2802 2807 expected = 0
2803 2808 if len(self):
2804 2809 expected = max(0, self.end(len(self) - 1))
2805 2810
2806 2811 try:
2807 2812 with self._datafp() as f:
2808 2813 f.seek(0, 2)
2809 2814 actual = f.tell()
2810 2815 dd = actual - expected
2811 2816 except IOError as inst:
2812 2817 if inst.errno != errno.ENOENT:
2813 2818 raise
2814 2819 dd = 0
2815 2820
2816 2821 try:
2817 2822 f = self.opener(self.indexfile)
2818 2823 f.seek(0, 2)
2819 2824 actual = f.tell()
2820 2825 f.close()
2821 2826 s = self._io.size
2822 2827 i = max(0, actual // s)
2823 2828 di = actual - (i * s)
2824 2829 if self._inline:
2825 2830 databytes = 0
2826 2831 for r in self:
2827 2832 databytes += max(0, self.length(r))
2828 2833 dd = 0
2829 2834 di = actual - len(self) * s - databytes
2830 2835 except IOError as inst:
2831 2836 if inst.errno != errno.ENOENT:
2832 2837 raise
2833 2838 di = 0
2834 2839
2835 2840 return (dd, di)
2836 2841
2837 2842 def files(self):
2838 2843 res = [self.indexfile]
2839 2844 if not self._inline:
2840 2845 res.append(self.datafile)
2841 2846 return res
2842 2847
2843 2848 DELTAREUSEALWAYS = 'always'
2844 2849 DELTAREUSESAMEREVS = 'samerevs'
2845 2850 DELTAREUSENEVER = 'never'
2846 2851
2847 2852 DELTAREUSEFULLADD = 'fulladd'
2848 2853
2849 2854 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2850 2855
2851 2856 def clone(self, tr, destrevlog, addrevisioncb=None,
2852 2857 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2853 2858 """Copy this revlog to another, possibly with format changes.
2854 2859
2855 2860 The destination revlog will contain the same revisions and nodes.
2856 2861 However, it may not be bit-for-bit identical due to e.g. delta encoding
2857 2862 differences.
2858 2863
2859 2864 The ``deltareuse`` argument control how deltas from the existing revlog
2860 2865 are preserved in the destination revlog. The argument can have the
2861 2866 following values:
2862 2867
2863 2868 DELTAREUSEALWAYS
2864 2869 Deltas will always be reused (if possible), even if the destination
2865 2870 revlog would not select the same revisions for the delta. This is the
2866 2871 fastest mode of operation.
2867 2872 DELTAREUSESAMEREVS
2868 2873 Deltas will be reused if the destination revlog would pick the same
2869 2874 revisions for the delta. This mode strikes a balance between speed
2870 2875 and optimization.
2871 2876 DELTAREUSENEVER
2872 2877 Deltas will never be reused. This is the slowest mode of execution.
2873 2878 This mode can be used to recompute deltas (e.g. if the diff/delta
2874 2879 algorithm changes).
2875 2880
2876 2881 Delta computation can be slow, so the choice of delta reuse policy can
2877 2882 significantly affect run time.
2878 2883
2879 2884 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2880 2885 two extremes. Deltas will be reused if they are appropriate. But if the
2881 2886 delta could choose a better revision, it will do so. This means if you
2882 2887 are converting a non-generaldelta revlog to a generaldelta revlog,
2883 2888 deltas will be recomputed if the delta's parent isn't a parent of the
2884 2889 revision.
2885 2890
2886 2891 In addition to the delta policy, the ``deltabothparents`` argument
2887 2892 controls whether to compute deltas against both parents for merges.
2888 2893 By default, the current default is used.
2889 2894 """
2890 2895 if deltareuse not in self.DELTAREUSEALL:
2891 2896 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2892 2897
2893 2898 if len(destrevlog):
2894 2899 raise ValueError(_('destination revlog is not empty'))
2895 2900
2896 2901 if getattr(self, 'filteredrevs', None):
2897 2902 raise ValueError(_('source revlog has filtered revisions'))
2898 2903 if getattr(destrevlog, 'filteredrevs', None):
2899 2904 raise ValueError(_('destination revlog has filtered revisions'))
2900 2905
2901 2906 # lazydeltabase controls whether to reuse a cached delta, if possible.
2902 2907 oldlazydeltabase = destrevlog._lazydeltabase
2903 2908 oldamd = destrevlog._deltabothparents
2904 2909
2905 2910 try:
2906 2911 if deltareuse == self.DELTAREUSEALWAYS:
2907 2912 destrevlog._lazydeltabase = True
2908 2913 elif deltareuse == self.DELTAREUSESAMEREVS:
2909 2914 destrevlog._lazydeltabase = False
2910 2915
2911 2916 destrevlog._deltabothparents = deltabothparents or oldamd
2912 2917
2913 2918 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2914 2919 self.DELTAREUSESAMEREVS)
2915 2920
2916 2921 deltacomputer = _deltacomputer(destrevlog)
2917 2922 index = self.index
2918 2923 for rev in self:
2919 2924 entry = index[rev]
2920 2925
2921 2926 # Some classes override linkrev to take filtered revs into
2922 2927 # account. Use raw entry from index.
2923 2928 flags = entry[0] & 0xffff
2924 2929 linkrev = entry[4]
2925 2930 p1 = index[entry[5]][7]
2926 2931 p2 = index[entry[6]][7]
2927 2932 node = entry[7]
2928 2933
2929 2934 # (Possibly) reuse the delta from the revlog if allowed and
2930 2935 # the revlog chunk is a delta.
2931 2936 cachedelta = None
2932 2937 rawtext = None
2933 2938 if populatecachedelta:
2934 2939 dp = self.deltaparent(rev)
2935 2940 if dp != nullrev:
2936 2941 cachedelta = (dp, bytes(self._chunk(rev)))
2937 2942
2938 2943 if not cachedelta:
2939 2944 rawtext = self.revision(rev, raw=True)
2940 2945
2941 2946
2942 2947 if deltareuse == self.DELTAREUSEFULLADD:
2943 2948 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2944 2949 cachedelta=cachedelta,
2945 2950 node=node, flags=flags,
2946 2951 deltacomputer=deltacomputer)
2947 2952 else:
2948 2953 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2949 2954 checkambig=False)
2950 2955 dfh = None
2951 2956 if not destrevlog._inline:
2952 2957 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2953 2958 try:
2954 2959 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2955 2960 p2, flags, cachedelta, ifh, dfh,
2956 2961 deltacomputer=deltacomputer)
2957 2962 finally:
2958 2963 if dfh:
2959 2964 dfh.close()
2960 2965 ifh.close()
2961 2966
2962 2967 if addrevisioncb:
2963 2968 addrevisioncb(self, rev, node)
2964 2969 finally:
2965 2970 destrevlog._lazydeltabase = oldlazydeltabase
2966 2971 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now