##// END OF EJS Templates
revlog: fix pure version of _partialmatch() to include nullid...
Martin von Zweigbergk -
r39227:5517d62c default
parent child Browse files
Show More
@@ -1,3073 +1,3076
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 nullhex,
30 31 nullid,
31 32 nullrev,
32 33 wdirfilenodeids,
33 34 wdirhex,
34 35 wdirid,
35 36 wdirrev,
36 37 )
37 38 from .i18n import _
38 39 from .thirdparty import (
39 40 attr,
40 41 )
41 42 from . import (
42 43 ancestor,
43 44 error,
44 45 mdiff,
45 46 policy,
46 47 pycompat,
47 48 templatefilters,
48 49 util,
49 50 )
50 51 from .utils import (
51 52 stringutil,
52 53 )
53 54
54 55 parsers = policy.importmod(r'parsers')
55 56
56 57 # Aliased for performance.
57 58 _zlibdecompress = zlib.decompress
58 59
59 60 # revlog header flags
60 61 REVLOGV0 = 0
61 62 REVLOGV1 = 1
62 63 # Dummy value until file format is finalized.
63 64 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 65 REVLOGV2 = 0xDEAD
65 66 FLAG_INLINE_DATA = (1 << 16)
66 67 FLAG_GENERALDELTA = (1 << 17)
67 68 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 69 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 70 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 71 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 72 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 73
73 74 # revlog index flags
74 75 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 76 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 77 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 78 REVIDX_DEFAULT_FLAGS = 0
78 79 # stable order in which flags need to be processed and their processors applied
79 80 REVIDX_FLAGS_ORDER = [
80 81 REVIDX_ISCENSORED,
81 82 REVIDX_ELLIPSIS,
82 83 REVIDX_EXTSTORED,
83 84 ]
84 85 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 86 # bitmark for flags that could cause rawdata content change
86 87 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 88
88 89 # max size of revlog with inline data
89 90 _maxinline = 131072
90 91 _chunksize = 1048576
91 92
92 93 RevlogError = error.RevlogError
93 94 LookupError = error.LookupError
94 95 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 96 CensoredNodeError = error.CensoredNodeError
96 97 ProgrammingError = error.ProgrammingError
97 98
98 99 # Store flag processors (cf. 'addflagprocessor()' to register)
99 100 _flagprocessors = {
100 101 REVIDX_ISCENSORED: None,
101 102 }
102 103
103 104 _mdre = re.compile('\1\n')
104 105 def parsemeta(text):
105 106 """return (metadatadict, metadatasize)"""
106 107 # text can be buffer, so we can't use .startswith or .index
107 108 if text[:2] != '\1\n':
108 109 return None, None
109 110 s = _mdre.search(text, 2).start()
110 111 mtext = text[2:s]
111 112 meta = {}
112 113 for l in mtext.splitlines():
113 114 k, v = l.split(": ", 1)
114 115 meta[k] = v
115 116 return meta, (s + 2)
116 117
117 118 def packmeta(meta, text):
118 119 keys = sorted(meta)
119 120 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 121 return "\1\n%s\1\n%s" % (metatext, text)
121 122
122 123 def _censoredtext(text):
123 124 m, offs = parsemeta(text)
124 125 return m and "censored" in m
125 126
126 127 def addflagprocessor(flag, processor):
127 128 """Register a flag processor on a revision data flag.
128 129
129 130 Invariant:
130 131 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 132 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 133 - Only one flag processor can be registered on a specific flag.
133 134 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 135 following signatures:
135 136 - (read) f(self, rawtext) -> text, bool
136 137 - (write) f(self, text) -> rawtext, bool
137 138 - (raw) f(self, rawtext) -> bool
138 139 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 140 directly visible to the user.
140 141 The boolean returned by these transforms is used to determine whether
141 142 the returned text can be used for hash integrity checking. For example,
142 143 if "write" returns False, then "text" is used to generate hash. If
143 144 "write" returns True, that basically means "rawtext" returned by "write"
144 145 should be used to generate hash. Usually, "write" and "read" return
145 146 different booleans. And "raw" returns a same boolean as "write".
146 147
147 148 Note: The 'raw' transform is used for changegroup generation and in some
148 149 debug commands. In this case the transform only indicates whether the
149 150 contents can be used for hash integrity checks.
150 151 """
151 152 if not flag & REVIDX_KNOWN_FLAGS:
152 153 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 154 raise ProgrammingError(msg)
154 155 if flag not in REVIDX_FLAGS_ORDER:
155 156 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 157 raise ProgrammingError(msg)
157 158 if flag in _flagprocessors:
158 159 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 160 raise error.Abort(msg)
160 161 _flagprocessors[flag] = processor
161 162
162 163 def getoffset(q):
163 164 return int(q >> 16)
164 165
165 166 def gettype(q):
166 167 return int(q & 0xFFFF)
167 168
168 169 def offset_type(offset, type):
169 170 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 171 raise ValueError('unknown revlog index flags')
171 172 return int(int(offset) << 16 | type)
172 173
173 174 _nullhash = hashlib.sha1(nullid)
174 175
175 176 def hash(text, p1, p2):
176 177 """generate a hash from the given text and its parent hashes
177 178
178 179 This hash combines both the current file contents and its history
179 180 in a manner that makes it easy to distinguish nodes with the same
180 181 content in the revision graph.
181 182 """
182 183 # As of now, if one of the parent node is null, p2 is null
183 184 if p2 == nullid:
184 185 # deep copy of a hash is faster than creating one
185 186 s = _nullhash.copy()
186 187 s.update(p1)
187 188 else:
188 189 # none of the parent nodes are nullid
189 190 if p1 < p2:
190 191 a = p1
191 192 b = p2
192 193 else:
193 194 a = p2
194 195 b = p1
195 196 s = hashlib.sha1(a)
196 197 s.update(b)
197 198 s.update(text)
198 199 return s.digest()
199 200
200 201 class _testrevlog(object):
201 202 """minimalist fake revlog to use in doctests"""
202 203
203 204 def __init__(self, data, density=0.5, mingap=0):
204 205 """data is an list of revision payload boundaries"""
205 206 self._data = data
206 207 self._srdensitythreshold = density
207 208 self._srmingapsize = mingap
208 209
209 210 def start(self, rev):
210 211 if rev == 0:
211 212 return 0
212 213 return self._data[rev - 1]
213 214
214 215 def end(self, rev):
215 216 return self._data[rev]
216 217
217 218 def length(self, rev):
218 219 return self.end(rev) - self.start(rev)
219 220
220 221 def __len__(self):
221 222 return len(self._data)
222 223
223 224 def _trimchunk(revlog, revs, startidx, endidx=None):
224 225 """returns revs[startidx:endidx] without empty trailing revs
225 226
226 227 Doctest Setup
227 228 >>> revlog = _testrevlog([
228 229 ... 5, #0
229 230 ... 10, #1
230 231 ... 12, #2
231 232 ... 12, #3 (empty)
232 233 ... 17, #4
233 234 ... 21, #5
234 235 ... 21, #6 (empty)
235 236 ... ])
236 237
237 238 Contiguous cases:
238 239 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 240 [0, 1, 2, 3, 4, 5]
240 241 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 242 [0, 1, 2, 3, 4]
242 243 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 244 [0, 1, 2]
244 245 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 246 [2]
246 247 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 248 [3, 4, 5]
248 249 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 250 [3, 4]
250 251
251 252 Discontiguous cases:
252 253 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 254 [1, 3, 5]
254 255 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 256 [1]
256 257 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 258 [3, 5]
258 259 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 260 [3, 5]
260 261 """
261 262 length = revlog.length
262 263
263 264 if endidx is None:
264 265 endidx = len(revs)
265 266
266 267 # If we have a non-emtpy delta candidate, there are nothing to trim
267 268 if revs[endidx - 1] < len(revlog):
268 269 # Trim empty revs at the end, except the very first revision of a chain
269 270 while (endidx > 1
270 271 and endidx > startidx
271 272 and length(revs[endidx - 1]) == 0):
272 273 endidx -= 1
273 274
274 275 return revs[startidx:endidx]
275 276
276 277 def _segmentspan(revlog, revs, deltainfo=None):
277 278 """Get the byte span of a segment of revisions
278 279
279 280 revs is a sorted array of revision numbers
280 281
281 282 >>> revlog = _testrevlog([
282 283 ... 5, #0
283 284 ... 10, #1
284 285 ... 12, #2
285 286 ... 12, #3 (empty)
286 287 ... 17, #4
287 288 ... ])
288 289
289 290 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
290 291 17
291 292 >>> _segmentspan(revlog, [0, 4])
292 293 17
293 294 >>> _segmentspan(revlog, [3, 4])
294 295 5
295 296 >>> _segmentspan(revlog, [1, 2, 3,])
296 297 7
297 298 >>> _segmentspan(revlog, [1, 3])
298 299 7
299 300 """
300 301 if not revs:
301 302 return 0
302 303 if deltainfo is not None and len(revlog) <= revs[-1]:
303 304 if len(revs) == 1:
304 305 return deltainfo.deltalen
305 306 offset = revlog.end(len(revlog) - 1)
306 307 end = deltainfo.deltalen + offset
307 308 else:
308 309 end = revlog.end(revs[-1])
309 310 return end - revlog.start(revs[0])
310 311
311 312 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
312 313 """slice revs to reduce the amount of unrelated data to be read from disk.
313 314
314 315 ``revs`` is sliced into groups that should be read in one time.
315 316 Assume that revs are sorted.
316 317
317 318 The initial chunk is sliced until the overall density (payload/chunks-span
318 319 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
319 320 `revlog._srmingapsize` is skipped.
320 321
321 322 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
322 323 For consistency with other slicing choice, this limit won't go lower than
323 324 `revlog._srmingapsize`.
324 325
325 326 If individual revisions chunk are larger than this limit, they will still
326 327 be raised individually.
327 328
328 329 >>> revlog = _testrevlog([
329 330 ... 5, #00 (5)
330 331 ... 10, #01 (5)
331 332 ... 12, #02 (2)
332 333 ... 12, #03 (empty)
333 334 ... 27, #04 (15)
334 335 ... 31, #05 (4)
335 336 ... 31, #06 (empty)
336 337 ... 42, #07 (11)
337 338 ... 47, #08 (5)
338 339 ... 47, #09 (empty)
339 340 ... 48, #10 (1)
340 341 ... 51, #11 (3)
341 342 ... 74, #12 (23)
342 343 ... 85, #13 (11)
343 344 ... 86, #14 (1)
344 345 ... 91, #15 (5)
345 346 ... ])
346 347
347 348 >>> list(_slicechunk(revlog, list(range(16))))
348 349 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
349 350 >>> list(_slicechunk(revlog, [0, 15]))
350 351 [[0], [15]]
351 352 >>> list(_slicechunk(revlog, [0, 11, 15]))
352 353 [[0], [11], [15]]
353 354 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
354 355 [[0], [11, 13, 15]]
355 356 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
356 357 [[1, 2], [5, 8, 10, 11], [14]]
357 358
358 359 Slicing with a maximum chunk size
359 360 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
360 361 [[0], [11], [13], [15]]
361 362 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
362 363 [[0], [11], [13, 15]]
363 364 """
364 365 if targetsize is not None:
365 366 targetsize = max(targetsize, revlog._srmingapsize)
366 367 # targetsize should not be specified when evaluating delta candidates:
367 368 # * targetsize is used to ensure we stay within specification when reading,
368 369 # * deltainfo is used to pick are good delta chain when writing.
369 370 if not (deltainfo is None or targetsize is None):
370 371 msg = 'cannot use `targetsize` with a `deltainfo`'
371 372 raise error.ProgrammingError(msg)
372 373 for chunk in _slicechunktodensity(revlog, revs,
373 374 deltainfo,
374 375 revlog._srdensitythreshold,
375 376 revlog._srmingapsize):
376 377 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
377 378 yield subchunk
378 379
379 380 def _slicechunktosize(revlog, revs, targetsize=None):
380 381 """slice revs to match the target size
381 382
382 383 This is intended to be used on chunk that density slicing selected by that
383 384 are still too large compared to the read garantee of revlog. This might
384 385 happens when "minimal gap size" interrupted the slicing or when chain are
385 386 built in a way that create large blocks next to each other.
386 387
387 388 >>> revlog = _testrevlog([
388 389 ... 3, #0 (3)
389 390 ... 5, #1 (2)
390 391 ... 6, #2 (1)
391 392 ... 8, #3 (2)
392 393 ... 8, #4 (empty)
393 394 ... 11, #5 (3)
394 395 ... 12, #6 (1)
395 396 ... 13, #7 (1)
396 397 ... 14, #8 (1)
397 398 ... ])
398 399
399 400 Cases where chunk is already small enough
400 401 >>> list(_slicechunktosize(revlog, [0], 3))
401 402 [[0]]
402 403 >>> list(_slicechunktosize(revlog, [6, 7], 3))
403 404 [[6, 7]]
404 405 >>> list(_slicechunktosize(revlog, [0], None))
405 406 [[0]]
406 407 >>> list(_slicechunktosize(revlog, [6, 7], None))
407 408 [[6, 7]]
408 409
409 410 cases where we need actual slicing
410 411 >>> list(_slicechunktosize(revlog, [0, 1], 3))
411 412 [[0], [1]]
412 413 >>> list(_slicechunktosize(revlog, [1, 3], 3))
413 414 [[1], [3]]
414 415 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
415 416 [[1, 2], [3]]
416 417 >>> list(_slicechunktosize(revlog, [3, 5], 3))
417 418 [[3], [5]]
418 419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
419 420 [[3], [5]]
420 421 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
421 422 [[5], [6, 7, 8]]
422 423 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
423 424 [[0], [1, 2], [3], [5], [6, 7, 8]]
424 425
425 426 Case with too large individual chunk (must return valid chunk)
426 427 >>> list(_slicechunktosize(revlog, [0, 1], 2))
427 428 [[0], [1]]
428 429 >>> list(_slicechunktosize(revlog, [1, 3], 1))
429 430 [[1], [3]]
430 431 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
431 432 [[3], [5]]
432 433 """
433 434 assert targetsize is None or 0 <= targetsize
434 435 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
435 436 yield revs
436 437 return
437 438
438 439 startrevidx = 0
439 440 startdata = revlog.start(revs[0])
440 441 endrevidx = 0
441 442 iterrevs = enumerate(revs)
442 443 next(iterrevs) # skip first rev.
443 444 for idx, r in iterrevs:
444 445 span = revlog.end(r) - startdata
445 446 if span <= targetsize:
446 447 endrevidx = idx
447 448 else:
448 449 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
449 450 if chunk:
450 451 yield chunk
451 452 startrevidx = idx
452 453 startdata = revlog.start(r)
453 454 endrevidx = idx
454 455 yield _trimchunk(revlog, revs, startrevidx)
455 456
456 457 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
457 458 mingapsize=0):
458 459 """slice revs to reduce the amount of unrelated data to be read from disk.
459 460
460 461 ``revs`` is sliced into groups that should be read in one time.
461 462 Assume that revs are sorted.
462 463
463 464 ``deltainfo`` is a _deltainfo instance of a revision that we would append
464 465 to the top of the revlog.
465 466
466 467 The initial chunk is sliced until the overall density (payload/chunks-span
467 468 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
468 469 skipped.
469 470
470 471 >>> revlog = _testrevlog([
471 472 ... 5, #00 (5)
472 473 ... 10, #01 (5)
473 474 ... 12, #02 (2)
474 475 ... 12, #03 (empty)
475 476 ... 27, #04 (15)
476 477 ... 31, #05 (4)
477 478 ... 31, #06 (empty)
478 479 ... 42, #07 (11)
479 480 ... 47, #08 (5)
480 481 ... 47, #09 (empty)
481 482 ... 48, #10 (1)
482 483 ... 51, #11 (3)
483 484 ... 74, #12 (23)
484 485 ... 85, #13 (11)
485 486 ... 86, #14 (1)
486 487 ... 91, #15 (5)
487 488 ... ])
488 489
489 490 >>> list(_slicechunktodensity(revlog, list(range(16))))
490 491 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
491 492 >>> list(_slicechunktodensity(revlog, [0, 15]))
492 493 [[0], [15]]
493 494 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
494 495 [[0], [11], [15]]
495 496 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
496 497 [[0], [11, 13, 15]]
497 498 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
498 499 [[1, 2], [5, 8, 10, 11], [14]]
499 500 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
500 501 ... mingapsize=20))
501 502 [[1, 2, 3, 5, 8, 10, 11], [14]]
502 503 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
503 504 ... targetdensity=0.95))
504 505 [[1, 2], [5], [8, 10, 11], [14]]
505 506 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
506 507 ... targetdensity=0.95, mingapsize=12))
507 508 [[1, 2], [5, 8, 10, 11], [14]]
508 509 """
509 510 start = revlog.start
510 511 length = revlog.length
511 512
512 513 if len(revs) <= 1:
513 514 yield revs
514 515 return
515 516
516 517 nextrev = len(revlog)
517 518 nextoffset = revlog.end(nextrev - 1)
518 519
519 520 if deltainfo is None:
520 521 deltachainspan = _segmentspan(revlog, revs)
521 522 chainpayload = sum(length(r) for r in revs)
522 523 else:
523 524 deltachainspan = deltainfo.distance
524 525 chainpayload = deltainfo.compresseddeltalen
525 526
526 527 if deltachainspan < mingapsize:
527 528 yield revs
528 529 return
529 530
530 531 readdata = deltachainspan
531 532
532 533 if deltachainspan:
533 534 density = chainpayload / float(deltachainspan)
534 535 else:
535 536 density = 1.0
536 537
537 538 if density >= targetdensity:
538 539 yield revs
539 540 return
540 541
541 542 if deltainfo is not None and deltainfo.deltalen:
542 543 revs = list(revs)
543 544 revs.append(nextrev)
544 545
545 546 # Store the gaps in a heap to have them sorted by decreasing size
546 547 gapsheap = []
547 548 heapq.heapify(gapsheap)
548 549 prevend = None
549 550 for i, rev in enumerate(revs):
550 551 if rev < nextrev:
551 552 revstart = start(rev)
552 553 revlen = length(rev)
553 554 else:
554 555 revstart = nextoffset
555 556 revlen = deltainfo.deltalen
556 557
557 558 # Skip empty revisions to form larger holes
558 559 if revlen == 0:
559 560 continue
560 561
561 562 if prevend is not None:
562 563 gapsize = revstart - prevend
563 564 # only consider holes that are large enough
564 565 if gapsize > mingapsize:
565 566 heapq.heappush(gapsheap, (-gapsize, i))
566 567
567 568 prevend = revstart + revlen
568 569
569 570 # Collect the indices of the largest holes until the density is acceptable
570 571 indicesheap = []
571 572 heapq.heapify(indicesheap)
572 573 while gapsheap and density < targetdensity:
573 574 oppgapsize, gapidx = heapq.heappop(gapsheap)
574 575
575 576 heapq.heappush(indicesheap, gapidx)
576 577
577 578 # the gap sizes are stored as negatives to be sorted decreasingly
578 579 # by the heap
579 580 readdata -= (-oppgapsize)
580 581 if readdata > 0:
581 582 density = chainpayload / float(readdata)
582 583 else:
583 584 density = 1.0
584 585
585 586 # Cut the revs at collected indices
586 587 previdx = 0
587 588 while indicesheap:
588 589 idx = heapq.heappop(indicesheap)
589 590
590 591 chunk = _trimchunk(revlog, revs, previdx, idx)
591 592 if chunk:
592 593 yield chunk
593 594
594 595 previdx = idx
595 596
596 597 chunk = _trimchunk(revlog, revs, previdx)
597 598 if chunk:
598 599 yield chunk
599 600
600 601 @attr.s(slots=True, frozen=True)
601 602 class _deltainfo(object):
602 603 distance = attr.ib()
603 604 deltalen = attr.ib()
604 605 data = attr.ib()
605 606 base = attr.ib()
606 607 chainbase = attr.ib()
607 608 chainlen = attr.ib()
608 609 compresseddeltalen = attr.ib()
609 610 snapshotdepth = attr.ib()
610 611
611 612 class _deltacomputer(object):
612 613 def __init__(self, revlog):
613 614 self.revlog = revlog
614 615
615 616 def _getcandidaterevs(self, p1, p2, cachedelta):
616 617 """
617 618 Provides revisions that present an interest to be diffed against,
618 619 grouped by level of easiness.
619 620 """
620 621 revlog = self.revlog
621 622 gdelta = revlog._generaldelta
622 623 curr = len(revlog)
623 624 prev = curr - 1
624 625 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
625 626
626 627 # should we try to build a delta?
627 628 if prev != nullrev and revlog.storedeltachains:
628 629 tested = set()
629 630 # This condition is true most of the time when processing
630 631 # changegroup data into a generaldelta repo. The only time it
631 632 # isn't true is if this is the first revision in a delta chain
632 633 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
633 634 if cachedelta and gdelta and revlog._lazydeltabase:
634 635 # Assume what we received from the server is a good choice
635 636 # build delta will reuse the cache
636 637 yield (cachedelta[0],)
637 638 tested.add(cachedelta[0])
638 639
639 640 if gdelta:
640 641 # exclude already lazy tested base if any
641 642 parents = [p for p in (p1r, p2r)
642 643 if p != nullrev and p not in tested]
643 644
644 645 if not revlog._deltabothparents and len(parents) == 2:
645 646 parents.sort()
646 647 # To minimize the chance of having to build a fulltext,
647 648 # pick first whichever parent is closest to us (max rev)
648 649 yield (parents[1],)
649 650 # then the other one (min rev) if the first did not fit
650 651 yield (parents[0],)
651 652 tested.update(parents)
652 653 elif len(parents) > 0:
653 654 # Test all parents (1 or 2), and keep the best candidate
654 655 yield parents
655 656 tested.update(parents)
656 657
657 658 if prev not in tested:
658 659 # other approach failed try against prev to hopefully save us a
659 660 # fulltext.
660 661 yield (prev,)
661 662 tested.add(prev)
662 663
663 664 def buildtext(self, revinfo, fh):
664 665 """Builds a fulltext version of a revision
665 666
666 667 revinfo: _revisioninfo instance that contains all needed info
667 668 fh: file handle to either the .i or the .d revlog file,
668 669 depending on whether it is inlined or not
669 670 """
670 671 btext = revinfo.btext
671 672 if btext[0] is not None:
672 673 return btext[0]
673 674
674 675 revlog = self.revlog
675 676 cachedelta = revinfo.cachedelta
676 677 flags = revinfo.flags
677 678 node = revinfo.node
678 679
679 680 baserev = cachedelta[0]
680 681 delta = cachedelta[1]
681 682 # special case deltas which replace entire base; no need to decode
682 683 # base revision. this neatly avoids censored bases, which throw when
683 684 # they're decoded.
684 685 hlen = struct.calcsize(">lll")
685 686 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
686 687 len(delta) - hlen):
687 688 btext[0] = delta[hlen:]
688 689 else:
689 690 # deltabase is rawtext before changed by flag processors, which is
690 691 # equivalent to non-raw text
691 692 basetext = revlog.revision(baserev, _df=fh, raw=False)
692 693 btext[0] = mdiff.patch(basetext, delta)
693 694
694 695 try:
695 696 res = revlog._processflags(btext[0], flags, 'read', raw=True)
696 697 btext[0], validatehash = res
697 698 if validatehash:
698 699 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
699 700 if flags & REVIDX_ISCENSORED:
700 701 raise RevlogError(_('node %s is not censored') % node)
701 702 except CensoredNodeError:
702 703 # must pass the censored index flag to add censored revisions
703 704 if not flags & REVIDX_ISCENSORED:
704 705 raise
705 706 return btext[0]
706 707
707 708 def _builddeltadiff(self, base, revinfo, fh):
708 709 revlog = self.revlog
709 710 t = self.buildtext(revinfo, fh)
710 711 if revlog.iscensored(base):
711 712 # deltas based on a censored revision must replace the
712 713 # full content in one patch, so delta works everywhere
713 714 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
714 715 delta = header + t
715 716 else:
716 717 ptext = revlog.revision(base, _df=fh, raw=True)
717 718 delta = mdiff.textdiff(ptext, t)
718 719
719 720 return delta
720 721
721 722 def _builddeltainfo(self, revinfo, base, fh):
722 723 # can we use the cached delta?
723 724 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
724 725 delta = revinfo.cachedelta[1]
725 726 else:
726 727 delta = self._builddeltadiff(base, revinfo, fh)
727 728 revlog = self.revlog
728 729 header, data = revlog.compress(delta)
729 730 deltalen = len(header) + len(data)
730 731 chainbase = revlog.chainbase(base)
731 732 offset = revlog.end(len(revlog) - 1)
732 733 dist = deltalen + offset - revlog.start(chainbase)
733 734 if revlog._generaldelta:
734 735 deltabase = base
735 736 else:
736 737 deltabase = chainbase
737 738 chainlen, compresseddeltalen = revlog._chaininfo(base)
738 739 chainlen += 1
739 740 compresseddeltalen += deltalen
740 741
741 742 revlog = self.revlog
742 743 snapshotdepth = None
743 744 if deltabase == nullrev:
744 745 snapshotdepth = 0
745 746 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
746 747 # A delta chain should always be one full snapshot,
747 748 # zero or more semi-snapshots, and zero or more deltas
748 749 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
749 750 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
750 751 snapshotdepth = len(revlog._deltachain(deltabase)[0])
751 752
752 753 return _deltainfo(dist, deltalen, (header, data), deltabase,
753 754 chainbase, chainlen, compresseddeltalen,
754 755 snapshotdepth)
755 756
756 757 def finddeltainfo(self, revinfo, fh):
757 758 """Find an acceptable delta against a candidate revision
758 759
759 760 revinfo: information about the revision (instance of _revisioninfo)
760 761 fh: file handle to either the .i or the .d revlog file,
761 762 depending on whether it is inlined or not
762 763
763 764 Returns the first acceptable candidate revision, as ordered by
764 765 _getcandidaterevs
765 766 """
766 767 if not revinfo.textlen:
767 768 return None # empty file do not need delta
768 769
769 770 cachedelta = revinfo.cachedelta
770 771 p1 = revinfo.p1
771 772 p2 = revinfo.p2
772 773 revlog = self.revlog
773 774
774 775 deltalength = self.revlog.length
775 776 deltaparent = self.revlog.deltaparent
776 777
777 778 deltainfo = None
778 779 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
779 780 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
780 781 # filter out delta base that will never produce good delta
781 782 candidaterevs = [r for r in candidaterevs
782 783 if self.revlog.length(r) <= deltas_limit]
783 784 nominateddeltas = []
784 785 for candidaterev in candidaterevs:
785 786 # skip over empty delta (no need to include them in a chain)
786 787 while candidaterev != nullrev and not deltalength(candidaterev):
787 788 candidaterev = deltaparent(candidaterev)
788 789 # no need to try a delta against nullid, this will be handled
789 790 # by fulltext later.
790 791 if candidaterev == nullrev:
791 792 continue
792 793 # no delta for rawtext-changing revs (see "candelta" for why)
793 794 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
794 795 continue
795 796 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
796 797 if revlog._isgooddeltainfo(candidatedelta, revinfo):
797 798 nominateddeltas.append(candidatedelta)
798 799 if nominateddeltas:
799 800 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
800 801 break
801 802
802 803 return deltainfo
803 804
804 805 @attr.s(slots=True, frozen=True)
805 806 class _revisioninfo(object):
806 807 """Information about a revision that allows building its fulltext
807 808 node: expected hash of the revision
808 809 p1, p2: parent revs of the revision
809 810 btext: built text cache consisting of a one-element list
810 811 cachedelta: (baserev, uncompressed_delta) or None
811 812 flags: flags associated to the revision storage
812 813
813 814 One of btext[0] or cachedelta must be set.
814 815 """
815 816 node = attr.ib()
816 817 p1 = attr.ib()
817 818 p2 = attr.ib()
818 819 btext = attr.ib()
819 820 textlen = attr.ib()
820 821 cachedelta = attr.ib()
821 822 flags = attr.ib()
822 823
823 824 # index v0:
824 825 # 4 bytes: offset
825 826 # 4 bytes: compressed length
826 827 # 4 bytes: base rev
827 828 # 4 bytes: link rev
828 829 # 20 bytes: parent 1 nodeid
829 830 # 20 bytes: parent 2 nodeid
830 831 # 20 bytes: nodeid
831 832 indexformatv0 = struct.Struct(">4l20s20s20s")
832 833 indexformatv0_pack = indexformatv0.pack
833 834 indexformatv0_unpack = indexformatv0.unpack
834 835
835 836 class revlogoldindex(list):
836 837 def __getitem__(self, i):
837 838 if i == -1:
838 839 return (0, 0, 0, -1, -1, -1, -1, nullid)
839 840 return list.__getitem__(self, i)
840 841
841 842 # maximum <delta-chain-data>/<revision-text-length> ratio
842 843 LIMIT_DELTA2TEXT = 2
843 844
844 845 class revlogoldio(object):
845 846 def __init__(self):
846 847 self.size = indexformatv0.size
847 848
848 849 def parseindex(self, data, inline):
849 850 s = self.size
850 851 index = []
851 852 nodemap = {nullid: nullrev}
852 853 n = off = 0
853 854 l = len(data)
854 855 while off + s <= l:
855 856 cur = data[off:off + s]
856 857 off += s
857 858 e = indexformatv0_unpack(cur)
858 859 # transform to revlogv1 format
859 860 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
860 861 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
861 862 index.append(e2)
862 863 nodemap[e[6]] = n
863 864 n += 1
864 865
865 866 return revlogoldindex(index), nodemap, None
866 867
867 868 def packentry(self, entry, node, version, rev):
868 869 if gettype(entry[0]):
869 870 raise RevlogError(_('index entry flags need revlog version 1'))
870 871 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
871 872 node(entry[5]), node(entry[6]), entry[7])
872 873 return indexformatv0_pack(*e2)
873 874
874 875 # index ng:
875 876 # 6 bytes: offset
876 877 # 2 bytes: flags
877 878 # 4 bytes: compressed length
878 879 # 4 bytes: uncompressed length
879 880 # 4 bytes: base rev
880 881 # 4 bytes: link rev
881 882 # 4 bytes: parent 1 rev
882 883 # 4 bytes: parent 2 rev
883 884 # 32 bytes: nodeid
884 885 indexformatng = struct.Struct(">Qiiiiii20s12x")
885 886 indexformatng_pack = indexformatng.pack
886 887 versionformat = struct.Struct(">I")
887 888 versionformat_pack = versionformat.pack
888 889 versionformat_unpack = versionformat.unpack
889 890
890 891 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
891 892 # signed integer)
892 893 _maxentrysize = 0x7fffffff
893 894
894 895 class revlogio(object):
895 896 def __init__(self):
896 897 self.size = indexformatng.size
897 898
898 899 def parseindex(self, data, inline):
899 900 # call the C implementation to parse the index data
900 901 index, cache = parsers.parse_index2(data, inline)
901 902 return index, getattr(index, 'nodemap', None), cache
902 903
903 904 def packentry(self, entry, node, version, rev):
904 905 p = indexformatng_pack(*entry)
905 906 if rev == 0:
906 907 p = versionformat_pack(version) + p[4:]
907 908 return p
908 909
909 910 class revlog(object):
910 911 """
911 912 the underlying revision storage object
912 913
913 914 A revlog consists of two parts, an index and the revision data.
914 915
915 916 The index is a file with a fixed record size containing
916 917 information on each revision, including its nodeid (hash), the
917 918 nodeids of its parents, the position and offset of its data within
918 919 the data file, and the revision it's based on. Finally, each entry
919 920 contains a linkrev entry that can serve as a pointer to external
920 921 data.
921 922
922 923 The revision data itself is a linear collection of data chunks.
923 924 Each chunk represents a revision and is usually represented as a
924 925 delta against the previous chunk. To bound lookup time, runs of
925 926 deltas are limited to about 2 times the length of the original
926 927 version data. This makes retrieval of a version proportional to
927 928 its size, or O(1) relative to the number of revisions.
928 929
929 930 Both pieces of the revlog are written to in an append-only
930 931 fashion, which means we never need to rewrite a file to insert or
931 932 remove data, and can use some simple techniques to avoid the need
932 933 for locking while reading.
933 934
934 935 If checkambig, indexfile is opened with checkambig=True at
935 936 writing, to avoid file stat ambiguity.
936 937
937 938 If mmaplargeindex is True, and an mmapindexthreshold is set, the
938 939 index will be mmapped rather than read if it is larger than the
939 940 configured threshold.
940 941
941 942 If censorable is True, the revlog can have censored revisions.
942 943 """
943 944 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
944 945 mmaplargeindex=False, censorable=False):
945 946 """
946 947 create a revlog object
947 948
948 949 opener is a function that abstracts the file opening operation
949 950 and can be used to implement COW semantics or the like.
950 951 """
951 952 self.indexfile = indexfile
952 953 self.datafile = datafile or (indexfile[:-2] + ".d")
953 954 self.opener = opener
954 955 # When True, indexfile is opened with checkambig=True at writing, to
955 956 # avoid file stat ambiguity.
956 957 self._checkambig = checkambig
957 958 self._censorable = censorable
958 959 # 3-tuple of (node, rev, text) for a raw revision.
959 960 self._cache = None
960 961 # Maps rev to chain base rev.
961 962 self._chainbasecache = util.lrucachedict(100)
962 963 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
963 964 self._chunkcache = (0, '')
964 965 # How much data to read and cache into the raw revlog data cache.
965 966 self._chunkcachesize = 65536
966 967 self._maxchainlen = None
967 968 self._deltabothparents = True
968 969 self.index = []
969 970 # Mapping of partial identifiers to full nodes.
970 971 self._pcache = {}
971 972 # Mapping of revision integer to full node.
972 973 self._nodecache = {nullid: nullrev}
973 974 self._nodepos = None
974 975 self._compengine = 'zlib'
975 976 self._maxdeltachainspan = -1
976 977 self._withsparseread = False
977 978 self._sparserevlog = False
978 979 self._srdensitythreshold = 0.50
979 980 self._srmingapsize = 262144
980 981
981 982 mmapindexthreshold = None
982 983 v = REVLOG_DEFAULT_VERSION
983 984 opts = getattr(opener, 'options', None)
984 985 if opts is not None:
985 986 if 'revlogv2' in opts:
986 987 # version 2 revlogs always use generaldelta.
987 988 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
988 989 elif 'revlogv1' in opts:
989 990 if 'generaldelta' in opts:
990 991 v |= FLAG_GENERALDELTA
991 992 else:
992 993 v = 0
993 994 if 'chunkcachesize' in opts:
994 995 self._chunkcachesize = opts['chunkcachesize']
995 996 if 'maxchainlen' in opts:
996 997 self._maxchainlen = opts['maxchainlen']
997 998 if 'deltabothparents' in opts:
998 999 self._deltabothparents = opts['deltabothparents']
999 1000 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
1000 1001 if 'compengine' in opts:
1001 1002 self._compengine = opts['compengine']
1002 1003 if 'maxdeltachainspan' in opts:
1003 1004 self._maxdeltachainspan = opts['maxdeltachainspan']
1004 1005 if mmaplargeindex and 'mmapindexthreshold' in opts:
1005 1006 mmapindexthreshold = opts['mmapindexthreshold']
1006 1007 self._sparserevlog = bool(opts.get('sparse-revlog', False))
1007 1008 withsparseread = bool(opts.get('with-sparse-read', False))
1008 1009 # sparse-revlog forces sparse-read
1009 1010 self._withsparseread = self._sparserevlog or withsparseread
1010 1011 if 'sparse-read-density-threshold' in opts:
1011 1012 self._srdensitythreshold = opts['sparse-read-density-threshold']
1012 1013 if 'sparse-read-min-gap-size' in opts:
1013 1014 self._srmingapsize = opts['sparse-read-min-gap-size']
1014 1015
1015 1016 if self._chunkcachesize <= 0:
1016 1017 raise RevlogError(_('revlog chunk cache size %r is not greater '
1017 1018 'than 0') % self._chunkcachesize)
1018 1019 elif self._chunkcachesize & (self._chunkcachesize - 1):
1019 1020 raise RevlogError(_('revlog chunk cache size %r is not a power '
1020 1021 'of 2') % self._chunkcachesize)
1021 1022
1022 1023 indexdata = ''
1023 1024 self._initempty = True
1024 1025 try:
1025 1026 with self._indexfp() as f:
1026 1027 if (mmapindexthreshold is not None and
1027 1028 self.opener.fstat(f).st_size >= mmapindexthreshold):
1028 1029 indexdata = util.buffer(util.mmapread(f))
1029 1030 else:
1030 1031 indexdata = f.read()
1031 1032 if len(indexdata) > 0:
1032 1033 v = versionformat_unpack(indexdata[:4])[0]
1033 1034 self._initempty = False
1034 1035 except IOError as inst:
1035 1036 if inst.errno != errno.ENOENT:
1036 1037 raise
1037 1038
1038 1039 self.version = v
1039 1040 self._inline = v & FLAG_INLINE_DATA
1040 1041 self._generaldelta = v & FLAG_GENERALDELTA
1041 1042 flags = v & ~0xFFFF
1042 1043 fmt = v & 0xFFFF
1043 1044 if fmt == REVLOGV0:
1044 1045 if flags:
1045 1046 raise RevlogError(_('unknown flags (%#04x) in version %d '
1046 1047 'revlog %s') %
1047 1048 (flags >> 16, fmt, self.indexfile))
1048 1049 elif fmt == REVLOGV1:
1049 1050 if flags & ~REVLOGV1_FLAGS:
1050 1051 raise RevlogError(_('unknown flags (%#04x) in version %d '
1051 1052 'revlog %s') %
1052 1053 (flags >> 16, fmt, self.indexfile))
1053 1054 elif fmt == REVLOGV2:
1054 1055 if flags & ~REVLOGV2_FLAGS:
1055 1056 raise RevlogError(_('unknown flags (%#04x) in version %d '
1056 1057 'revlog %s') %
1057 1058 (flags >> 16, fmt, self.indexfile))
1058 1059 else:
1059 1060 raise RevlogError(_('unknown version (%d) in revlog %s') %
1060 1061 (fmt, self.indexfile))
1061 1062
1062 1063 self.storedeltachains = True
1063 1064
1064 1065 self._io = revlogio()
1065 1066 if self.version == REVLOGV0:
1066 1067 self._io = revlogoldio()
1067 1068 try:
1068 1069 d = self._io.parseindex(indexdata, self._inline)
1069 1070 except (ValueError, IndexError):
1070 1071 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1071 1072 self.index, nodemap, self._chunkcache = d
1072 1073 if nodemap is not None:
1073 1074 self.nodemap = self._nodecache = nodemap
1074 1075 if not self._chunkcache:
1075 1076 self._chunkclear()
1076 1077 # revnum -> (chain-length, sum-delta-length)
1077 1078 self._chaininfocache = {}
1078 1079 # revlog header -> revlog compressor
1079 1080 self._decompressors = {}
1080 1081
1081 1082 @util.propertycache
1082 1083 def _compressor(self):
1083 1084 return util.compengines[self._compengine].revlogcompressor()
1084 1085
1085 1086 def _indexfp(self, mode='r'):
1086 1087 """file object for the revlog's index file"""
1087 1088 args = {r'mode': mode}
1088 1089 if mode != 'r':
1089 1090 args[r'checkambig'] = self._checkambig
1090 1091 if mode == 'w':
1091 1092 args[r'atomictemp'] = True
1092 1093 return self.opener(self.indexfile, **args)
1093 1094
1094 1095 def _datafp(self, mode='r'):
1095 1096 """file object for the revlog's data file"""
1096 1097 return self.opener(self.datafile, mode=mode)
1097 1098
1098 1099 @contextlib.contextmanager
1099 1100 def _datareadfp(self, existingfp=None):
1100 1101 """file object suitable to read data"""
1101 1102 if existingfp is not None:
1102 1103 yield existingfp
1103 1104 else:
1104 1105 if self._inline:
1105 1106 func = self._indexfp
1106 1107 else:
1107 1108 func = self._datafp
1108 1109 with func() as fp:
1109 1110 yield fp
1110 1111
1111 1112 def tip(self):
1112 1113 return self.node(len(self.index) - 1)
1113 1114 def __contains__(self, rev):
1114 1115 return 0 <= rev < len(self)
1115 1116 def __len__(self):
1116 1117 return len(self.index)
1117 1118 def __iter__(self):
1118 1119 return iter(pycompat.xrange(len(self)))
1119 1120 def revs(self, start=0, stop=None):
1120 1121 """iterate over all rev in this revlog (from start to stop)"""
1121 1122 step = 1
1122 1123 length = len(self)
1123 1124 if stop is not None:
1124 1125 if start > stop:
1125 1126 step = -1
1126 1127 stop += step
1127 1128 if stop > length:
1128 1129 stop = length
1129 1130 else:
1130 1131 stop = length
1131 1132 return pycompat.xrange(start, stop, step)
1132 1133
1133 1134 @util.propertycache
1134 1135 def nodemap(self):
1135 1136 if self.index:
1136 1137 # populate mapping down to the initial node
1137 1138 node0 = self.index[0][7] # get around changelog filtering
1138 1139 self.rev(node0)
1139 1140 return self._nodecache
1140 1141
1141 1142 def hasnode(self, node):
1142 1143 try:
1143 1144 self.rev(node)
1144 1145 return True
1145 1146 except KeyError:
1146 1147 return False
1147 1148
1148 1149 def candelta(self, baserev, rev):
1149 1150 """whether two revisions (baserev, rev) can be delta-ed or not"""
1150 1151 # Disable delta if either rev requires a content-changing flag
1151 1152 # processor (ex. LFS). This is because such flag processor can alter
1152 1153 # the rawtext content that the delta will be based on, and two clients
1153 1154 # could have a same revlog node with different flags (i.e. different
1154 1155 # rawtext contents) and the delta could be incompatible.
1155 1156 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1156 1157 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1157 1158 return False
1158 1159 return True
1159 1160
1160 1161 def clearcaches(self):
1161 1162 self._cache = None
1162 1163 self._chainbasecache.clear()
1163 1164 self._chunkcache = (0, '')
1164 1165 self._pcache = {}
1165 1166
1166 1167 try:
1167 1168 self._nodecache.clearcaches()
1168 1169 except AttributeError:
1169 1170 self._nodecache = {nullid: nullrev}
1170 1171 self._nodepos = None
1171 1172
1172 1173 def rev(self, node):
1173 1174 try:
1174 1175 return self._nodecache[node]
1175 1176 except TypeError:
1176 1177 raise
1177 1178 except RevlogError:
1178 1179 # parsers.c radix tree lookup failed
1179 1180 if node == wdirid or node in wdirfilenodeids:
1180 1181 raise error.WdirUnsupported
1181 1182 raise LookupError(node, self.indexfile, _('no node'))
1182 1183 except KeyError:
1183 1184 # pure python cache lookup failed
1184 1185 n = self._nodecache
1185 1186 i = self.index
1186 1187 p = self._nodepos
1187 1188 if p is None:
1188 1189 p = len(i) - 1
1189 1190 else:
1190 1191 assert p < len(i)
1191 1192 for r in pycompat.xrange(p, -1, -1):
1192 1193 v = i[r][7]
1193 1194 n[v] = r
1194 1195 if v == node:
1195 1196 self._nodepos = r - 1
1196 1197 return r
1197 1198 if node == wdirid or node in wdirfilenodeids:
1198 1199 raise error.WdirUnsupported
1199 1200 raise LookupError(node, self.indexfile, _('no node'))
1200 1201
1201 1202 # Accessors for index entries.
1202 1203
1203 1204 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 1205 # are flags.
1205 1206 def start(self, rev):
1206 1207 return int(self.index[rev][0] >> 16)
1207 1208
1208 1209 def flags(self, rev):
1209 1210 return self.index[rev][0] & 0xFFFF
1210 1211
1211 1212 def length(self, rev):
1212 1213 return self.index[rev][1]
1213 1214
1214 1215 def rawsize(self, rev):
1215 1216 """return the length of the uncompressed text for a given revision"""
1216 1217 l = self.index[rev][2]
1217 1218 if l >= 0:
1218 1219 return l
1219 1220
1220 1221 t = self.revision(rev, raw=True)
1221 1222 return len(t)
1222 1223
1223 1224 def size(self, rev):
1224 1225 """length of non-raw text (processed by a "read" flag processor)"""
1225 1226 # fast path: if no "read" flag processor could change the content,
1226 1227 # size is rawsize. note: ELLIPSIS is known to not change the content.
1227 1228 flags = self.flags(rev)
1228 1229 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1229 1230 return self.rawsize(rev)
1230 1231
1231 1232 return len(self.revision(rev, raw=False))
1232 1233
1233 1234 def chainbase(self, rev):
1234 1235 base = self._chainbasecache.get(rev)
1235 1236 if base is not None:
1236 1237 return base
1237 1238
1238 1239 index = self.index
1239 1240 iterrev = rev
1240 1241 base = index[iterrev][3]
1241 1242 while base != iterrev:
1242 1243 iterrev = base
1243 1244 base = index[iterrev][3]
1244 1245
1245 1246 self._chainbasecache[rev] = base
1246 1247 return base
1247 1248
1248 1249 def linkrev(self, rev):
1249 1250 return self.index[rev][4]
1250 1251
1251 1252 def parentrevs(self, rev):
1252 1253 try:
1253 1254 entry = self.index[rev]
1254 1255 except IndexError:
1255 1256 if rev == wdirrev:
1256 1257 raise error.WdirUnsupported
1257 1258 raise
1258 1259
1259 1260 return entry[5], entry[6]
1260 1261
1261 1262 def node(self, rev):
1262 1263 try:
1263 1264 return self.index[rev][7]
1264 1265 except IndexError:
1265 1266 if rev == wdirrev:
1266 1267 raise error.WdirUnsupported
1267 1268 raise
1268 1269
1269 1270 # Derived from index values.
1270 1271
1271 1272 def end(self, rev):
1272 1273 return self.start(rev) + self.length(rev)
1273 1274
1274 1275 def parents(self, node):
1275 1276 i = self.index
1276 1277 d = i[self.rev(node)]
1277 1278 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1278 1279
1279 1280 def chainlen(self, rev):
1280 1281 return self._chaininfo(rev)[0]
1281 1282
1282 1283 def _chaininfo(self, rev):
1283 1284 chaininfocache = self._chaininfocache
1284 1285 if rev in chaininfocache:
1285 1286 return chaininfocache[rev]
1286 1287 index = self.index
1287 1288 generaldelta = self._generaldelta
1288 1289 iterrev = rev
1289 1290 e = index[iterrev]
1290 1291 clen = 0
1291 1292 compresseddeltalen = 0
1292 1293 while iterrev != e[3]:
1293 1294 clen += 1
1294 1295 compresseddeltalen += e[1]
1295 1296 if generaldelta:
1296 1297 iterrev = e[3]
1297 1298 else:
1298 1299 iterrev -= 1
1299 1300 if iterrev in chaininfocache:
1300 1301 t = chaininfocache[iterrev]
1301 1302 clen += t[0]
1302 1303 compresseddeltalen += t[1]
1303 1304 break
1304 1305 e = index[iterrev]
1305 1306 else:
1306 1307 # Add text length of base since decompressing that also takes
1307 1308 # work. For cache hits the length is already included.
1308 1309 compresseddeltalen += e[1]
1309 1310 r = (clen, compresseddeltalen)
1310 1311 chaininfocache[rev] = r
1311 1312 return r
1312 1313
1313 1314 def _deltachain(self, rev, stoprev=None):
1314 1315 """Obtain the delta chain for a revision.
1315 1316
1316 1317 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 1318 stop at the base of the chain.
1318 1319
1319 1320 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 1321 revs in ascending order and ``stopped`` is a bool indicating whether
1321 1322 ``stoprev`` was hit.
1322 1323 """
1323 1324 # Try C implementation.
1324 1325 try:
1325 1326 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 1327 except AttributeError:
1327 1328 pass
1328 1329
1329 1330 chain = []
1330 1331
1331 1332 # Alias to prevent attribute lookup in tight loop.
1332 1333 index = self.index
1333 1334 generaldelta = self._generaldelta
1334 1335
1335 1336 iterrev = rev
1336 1337 e = index[iterrev]
1337 1338 while iterrev != e[3] and iterrev != stoprev:
1338 1339 chain.append(iterrev)
1339 1340 if generaldelta:
1340 1341 iterrev = e[3]
1341 1342 else:
1342 1343 iterrev -= 1
1343 1344 e = index[iterrev]
1344 1345
1345 1346 if iterrev == stoprev:
1346 1347 stopped = True
1347 1348 else:
1348 1349 chain.append(iterrev)
1349 1350 stopped = False
1350 1351
1351 1352 chain.reverse()
1352 1353 return chain, stopped
1353 1354
1354 1355 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 1356 """Generate the ancestors of 'revs' in reverse topological order.
1356 1357 Does not generate revs lower than stoprev.
1357 1358
1358 1359 See the documentation for ancestor.lazyancestors for more details."""
1359 1360
1360 1361 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1361 1362 inclusive=inclusive)
1362 1363
1363 1364 def descendants(self, revs):
1364 1365 """Generate the descendants of 'revs' in revision order.
1365 1366
1366 1367 Yield a sequence of revision numbers starting with a child of
1367 1368 some rev in revs, i.e., each revision is *not* considered a
1368 1369 descendant of itself. Results are ordered by revision number (a
1369 1370 topological sort)."""
1370 1371 first = min(revs)
1371 1372 if first == nullrev:
1372 1373 for i in self:
1373 1374 yield i
1374 1375 return
1375 1376
1376 1377 seen = set(revs)
1377 1378 for i in self.revs(start=first + 1):
1378 1379 for x in self.parentrevs(i):
1379 1380 if x != nullrev and x in seen:
1380 1381 seen.add(i)
1381 1382 yield i
1382 1383 break
1383 1384
1384 1385 def findcommonmissing(self, common=None, heads=None):
1385 1386 """Return a tuple of the ancestors of common and the ancestors of heads
1386 1387 that are not ancestors of common. In revset terminology, we return the
1387 1388 tuple:
1388 1389
1389 1390 ::common, (::heads) - (::common)
1390 1391
1391 1392 The list is sorted by revision number, meaning it is
1392 1393 topologically sorted.
1393 1394
1394 1395 'heads' and 'common' are both lists of node IDs. If heads is
1395 1396 not supplied, uses all of the revlog's heads. If common is not
1396 1397 supplied, uses nullid."""
1397 1398 if common is None:
1398 1399 common = [nullid]
1399 1400 if heads is None:
1400 1401 heads = self.heads()
1401 1402
1402 1403 common = [self.rev(n) for n in common]
1403 1404 heads = [self.rev(n) for n in heads]
1404 1405
1405 1406 # we want the ancestors, but inclusive
1406 1407 class lazyset(object):
1407 1408 def __init__(self, lazyvalues):
1408 1409 self.addedvalues = set()
1409 1410 self.lazyvalues = lazyvalues
1410 1411
1411 1412 def __contains__(self, value):
1412 1413 return value in self.addedvalues or value in self.lazyvalues
1413 1414
1414 1415 def __iter__(self):
1415 1416 added = self.addedvalues
1416 1417 for r in added:
1417 1418 yield r
1418 1419 for r in self.lazyvalues:
1419 1420 if not r in added:
1420 1421 yield r
1421 1422
1422 1423 def add(self, value):
1423 1424 self.addedvalues.add(value)
1424 1425
1425 1426 def update(self, values):
1426 1427 self.addedvalues.update(values)
1427 1428
1428 1429 has = lazyset(self.ancestors(common))
1429 1430 has.add(nullrev)
1430 1431 has.update(common)
1431 1432
1432 1433 # take all ancestors from heads that aren't in has
1433 1434 missing = set()
1434 1435 visit = collections.deque(r for r in heads if r not in has)
1435 1436 while visit:
1436 1437 r = visit.popleft()
1437 1438 if r in missing:
1438 1439 continue
1439 1440 else:
1440 1441 missing.add(r)
1441 1442 for p in self.parentrevs(r):
1442 1443 if p not in has:
1443 1444 visit.append(p)
1444 1445 missing = list(missing)
1445 1446 missing.sort()
1446 1447 return has, [self.node(miss) for miss in missing]
1447 1448
1448 1449 def incrementalmissingrevs(self, common=None):
1449 1450 """Return an object that can be used to incrementally compute the
1450 1451 revision numbers of the ancestors of arbitrary sets that are not
1451 1452 ancestors of common. This is an ancestor.incrementalmissingancestors
1452 1453 object.
1453 1454
1454 1455 'common' is a list of revision numbers. If common is not supplied, uses
1455 1456 nullrev.
1456 1457 """
1457 1458 if common is None:
1458 1459 common = [nullrev]
1459 1460
1460 1461 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1461 1462
1462 1463 def findmissingrevs(self, common=None, heads=None):
1463 1464 """Return the revision numbers of the ancestors of heads that
1464 1465 are not ancestors of common.
1465 1466
1466 1467 More specifically, return a list of revision numbers corresponding to
1467 1468 nodes N such that every N satisfies the following constraints:
1468 1469
1469 1470 1. N is an ancestor of some node in 'heads'
1470 1471 2. N is not an ancestor of any node in 'common'
1471 1472
1472 1473 The list is sorted by revision number, meaning it is
1473 1474 topologically sorted.
1474 1475
1475 1476 'heads' and 'common' are both lists of revision numbers. If heads is
1476 1477 not supplied, uses all of the revlog's heads. If common is not
1477 1478 supplied, uses nullid."""
1478 1479 if common is None:
1479 1480 common = [nullrev]
1480 1481 if heads is None:
1481 1482 heads = self.headrevs()
1482 1483
1483 1484 inc = self.incrementalmissingrevs(common=common)
1484 1485 return inc.missingancestors(heads)
1485 1486
1486 1487 def findmissing(self, common=None, heads=None):
1487 1488 """Return the ancestors of heads that are not ancestors of common.
1488 1489
1489 1490 More specifically, return a list of nodes N such that every N
1490 1491 satisfies the following constraints:
1491 1492
1492 1493 1. N is an ancestor of some node in 'heads'
1493 1494 2. N is not an ancestor of any node in 'common'
1494 1495
1495 1496 The list is sorted by revision number, meaning it is
1496 1497 topologically sorted.
1497 1498
1498 1499 'heads' and 'common' are both lists of node IDs. If heads is
1499 1500 not supplied, uses all of the revlog's heads. If common is not
1500 1501 supplied, uses nullid."""
1501 1502 if common is None:
1502 1503 common = [nullid]
1503 1504 if heads is None:
1504 1505 heads = self.heads()
1505 1506
1506 1507 common = [self.rev(n) for n in common]
1507 1508 heads = [self.rev(n) for n in heads]
1508 1509
1509 1510 inc = self.incrementalmissingrevs(common=common)
1510 1511 return [self.node(r) for r in inc.missingancestors(heads)]
1511 1512
1512 1513 def nodesbetween(self, roots=None, heads=None):
1513 1514 """Return a topological path from 'roots' to 'heads'.
1514 1515
1515 1516 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1516 1517 topologically sorted list of all nodes N that satisfy both of
1517 1518 these constraints:
1518 1519
1519 1520 1. N is a descendant of some node in 'roots'
1520 1521 2. N is an ancestor of some node in 'heads'
1521 1522
1522 1523 Every node is considered to be both a descendant and an ancestor
1523 1524 of itself, so every reachable node in 'roots' and 'heads' will be
1524 1525 included in 'nodes'.
1525 1526
1526 1527 'outroots' is the list of reachable nodes in 'roots', i.e., the
1527 1528 subset of 'roots' that is returned in 'nodes'. Likewise,
1528 1529 'outheads' is the subset of 'heads' that is also in 'nodes'.
1529 1530
1530 1531 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1531 1532 unspecified, uses nullid as the only root. If 'heads' is
1532 1533 unspecified, uses list of all of the revlog's heads."""
1533 1534 nonodes = ([], [], [])
1534 1535 if roots is not None:
1535 1536 roots = list(roots)
1536 1537 if not roots:
1537 1538 return nonodes
1538 1539 lowestrev = min([self.rev(n) for n in roots])
1539 1540 else:
1540 1541 roots = [nullid] # Everybody's a descendant of nullid
1541 1542 lowestrev = nullrev
1542 1543 if (lowestrev == nullrev) and (heads is None):
1543 1544 # We want _all_ the nodes!
1544 1545 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1545 1546 if heads is None:
1546 1547 # All nodes are ancestors, so the latest ancestor is the last
1547 1548 # node.
1548 1549 highestrev = len(self) - 1
1549 1550 # Set ancestors to None to signal that every node is an ancestor.
1550 1551 ancestors = None
1551 1552 # Set heads to an empty dictionary for later discovery of heads
1552 1553 heads = {}
1553 1554 else:
1554 1555 heads = list(heads)
1555 1556 if not heads:
1556 1557 return nonodes
1557 1558 ancestors = set()
1558 1559 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 1560 # Also, later we will be using it to filter out the heads we can't
1560 1561 # find from roots.
1561 1562 heads = dict.fromkeys(heads, False)
1562 1563 # Start at the top and keep marking parents until we're done.
1563 1564 nodestotag = set(heads)
1564 1565 # Remember where the top was so we can use it as a limit later.
1565 1566 highestrev = max([self.rev(n) for n in nodestotag])
1566 1567 while nodestotag:
1567 1568 # grab a node to tag
1568 1569 n = nodestotag.pop()
1569 1570 # Never tag nullid
1570 1571 if n == nullid:
1571 1572 continue
1572 1573 # A node's revision number represents its place in a
1573 1574 # topologically sorted list of nodes.
1574 1575 r = self.rev(n)
1575 1576 if r >= lowestrev:
1576 1577 if n not in ancestors:
1577 1578 # If we are possibly a descendant of one of the roots
1578 1579 # and we haven't already been marked as an ancestor
1579 1580 ancestors.add(n) # Mark as ancestor
1580 1581 # Add non-nullid parents to list of nodes to tag.
1581 1582 nodestotag.update([p for p in self.parents(n) if
1582 1583 p != nullid])
1583 1584 elif n in heads: # We've seen it before, is it a fake head?
1584 1585 # So it is, real heads should not be the ancestors of
1585 1586 # any other heads.
1586 1587 heads.pop(n)
1587 1588 if not ancestors:
1588 1589 return nonodes
1589 1590 # Now that we have our set of ancestors, we want to remove any
1590 1591 # roots that are not ancestors.
1591 1592
1592 1593 # If one of the roots was nullid, everything is included anyway.
1593 1594 if lowestrev > nullrev:
1594 1595 # But, since we weren't, let's recompute the lowest rev to not
1595 1596 # include roots that aren't ancestors.
1596 1597
1597 1598 # Filter out roots that aren't ancestors of heads
1598 1599 roots = [root for root in roots if root in ancestors]
1599 1600 # Recompute the lowest revision
1600 1601 if roots:
1601 1602 lowestrev = min([self.rev(root) for root in roots])
1602 1603 else:
1603 1604 # No more roots? Return empty list
1604 1605 return nonodes
1605 1606 else:
1606 1607 # We are descending from nullid, and don't need to care about
1607 1608 # any other roots.
1608 1609 lowestrev = nullrev
1609 1610 roots = [nullid]
1610 1611 # Transform our roots list into a set.
1611 1612 descendants = set(roots)
1612 1613 # Also, keep the original roots so we can filter out roots that aren't
1613 1614 # 'real' roots (i.e. are descended from other roots).
1614 1615 roots = descendants.copy()
1615 1616 # Our topologically sorted list of output nodes.
1616 1617 orderedout = []
1617 1618 # Don't start at nullid since we don't want nullid in our output list,
1618 1619 # and if nullid shows up in descendants, empty parents will look like
1619 1620 # they're descendants.
1620 1621 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1621 1622 n = self.node(r)
1622 1623 isdescendant = False
1623 1624 if lowestrev == nullrev: # Everybody is a descendant of nullid
1624 1625 isdescendant = True
1625 1626 elif n in descendants:
1626 1627 # n is already a descendant
1627 1628 isdescendant = True
1628 1629 # This check only needs to be done here because all the roots
1629 1630 # will start being marked is descendants before the loop.
1630 1631 if n in roots:
1631 1632 # If n was a root, check if it's a 'real' root.
1632 1633 p = tuple(self.parents(n))
1633 1634 # If any of its parents are descendants, it's not a root.
1634 1635 if (p[0] in descendants) or (p[1] in descendants):
1635 1636 roots.remove(n)
1636 1637 else:
1637 1638 p = tuple(self.parents(n))
1638 1639 # A node is a descendant if either of its parents are
1639 1640 # descendants. (We seeded the dependents list with the roots
1640 1641 # up there, remember?)
1641 1642 if (p[0] in descendants) or (p[1] in descendants):
1642 1643 descendants.add(n)
1643 1644 isdescendant = True
1644 1645 if isdescendant and ((ancestors is None) or (n in ancestors)):
1645 1646 # Only include nodes that are both descendants and ancestors.
1646 1647 orderedout.append(n)
1647 1648 if (ancestors is not None) and (n in heads):
1648 1649 # We're trying to figure out which heads are reachable
1649 1650 # from roots.
1650 1651 # Mark this head as having been reached
1651 1652 heads[n] = True
1652 1653 elif ancestors is None:
1653 1654 # Otherwise, we're trying to discover the heads.
1654 1655 # Assume this is a head because if it isn't, the next step
1655 1656 # will eventually remove it.
1656 1657 heads[n] = True
1657 1658 # But, obviously its parents aren't.
1658 1659 for p in self.parents(n):
1659 1660 heads.pop(p, None)
1660 1661 heads = [head for head, flag in heads.iteritems() if flag]
1661 1662 roots = list(roots)
1662 1663 assert orderedout
1663 1664 assert roots
1664 1665 assert heads
1665 1666 return (orderedout, roots, heads)
1666 1667
1667 1668 def headrevs(self):
1668 1669 try:
1669 1670 return self.index.headrevs()
1670 1671 except AttributeError:
1671 1672 return self._headrevs()
1672 1673
1673 1674 def computephases(self, roots):
1674 1675 return self.index.computephasesmapsets(roots)
1675 1676
1676 1677 def _headrevs(self):
1677 1678 count = len(self)
1678 1679 if not count:
1679 1680 return [nullrev]
1680 1681 # we won't iter over filtered rev so nobody is a head at start
1681 1682 ishead = [0] * (count + 1)
1682 1683 index = self.index
1683 1684 for r in self:
1684 1685 ishead[r] = 1 # I may be an head
1685 1686 e = index[r]
1686 1687 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1687 1688 return [r for r, val in enumerate(ishead) if val]
1688 1689
1689 1690 def heads(self, start=None, stop=None):
1690 1691 """return the list of all nodes that have no children
1691 1692
1692 1693 if start is specified, only heads that are descendants of
1693 1694 start will be returned
1694 1695 if stop is specified, it will consider all the revs from stop
1695 1696 as if they had no children
1696 1697 """
1697 1698 if start is None and stop is None:
1698 1699 if not len(self):
1699 1700 return [nullid]
1700 1701 return [self.node(r) for r in self.headrevs()]
1701 1702
1702 1703 if start is None:
1703 1704 start = nullid
1704 1705 if stop is None:
1705 1706 stop = []
1706 1707 stoprevs = set([self.rev(n) for n in stop])
1707 1708 startrev = self.rev(start)
1708 1709 reachable = {startrev}
1709 1710 heads = {startrev}
1710 1711
1711 1712 parentrevs = self.parentrevs
1712 1713 for r in self.revs(start=startrev + 1):
1713 1714 for p in parentrevs(r):
1714 1715 if p in reachable:
1715 1716 if r not in stoprevs:
1716 1717 reachable.add(r)
1717 1718 heads.add(r)
1718 1719 if p in heads and p not in stoprevs:
1719 1720 heads.remove(p)
1720 1721
1721 1722 return [self.node(r) for r in heads]
1722 1723
1723 1724 def children(self, node):
1724 1725 """find the children of a given node"""
1725 1726 c = []
1726 1727 p = self.rev(node)
1727 1728 for r in self.revs(start=p + 1):
1728 1729 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1729 1730 if prevs:
1730 1731 for pr in prevs:
1731 1732 if pr == p:
1732 1733 c.append(self.node(r))
1733 1734 elif p == nullrev:
1734 1735 c.append(self.node(r))
1735 1736 return c
1736 1737
1737 1738 def commonancestorsheads(self, a, b):
1738 1739 """calculate all the heads of the common ancestors of nodes a and b"""
1739 1740 a, b = self.rev(a), self.rev(b)
1740 1741 ancs = self._commonancestorsheads(a, b)
1741 1742 return pycompat.maplist(self.node, ancs)
1742 1743
1743 1744 def _commonancestorsheads(self, *revs):
1744 1745 """calculate all the heads of the common ancestors of revs"""
1745 1746 try:
1746 1747 ancs = self.index.commonancestorsheads(*revs)
1747 1748 except (AttributeError, OverflowError): # C implementation failed
1748 1749 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1749 1750 return ancs
1750 1751
1751 1752 def isancestor(self, a, b):
1752 1753 """return True if node a is an ancestor of node b
1753 1754
1754 1755 A revision is considered an ancestor of itself."""
1755 1756 a, b = self.rev(a), self.rev(b)
1756 1757 return self.isancestorrev(a, b)
1757 1758
1758 1759 def isancestorrev(self, a, b):
1759 1760 """return True if revision a is an ancestor of revision b
1760 1761
1761 1762 A revision is considered an ancestor of itself.
1762 1763
1763 1764 The implementation of this is trivial but the use of
1764 1765 commonancestorsheads is not."""
1765 1766 if a == nullrev:
1766 1767 return True
1767 1768 elif a == b:
1768 1769 return True
1769 1770 elif a > b:
1770 1771 return False
1771 1772 return a in self._commonancestorsheads(a, b)
1772 1773
1773 1774 def ancestor(self, a, b):
1774 1775 """calculate the "best" common ancestor of nodes a and b"""
1775 1776
1776 1777 a, b = self.rev(a), self.rev(b)
1777 1778 try:
1778 1779 ancs = self.index.ancestors(a, b)
1779 1780 except (AttributeError, OverflowError):
1780 1781 ancs = ancestor.ancestors(self.parentrevs, a, b)
1781 1782 if ancs:
1782 1783 # choose a consistent winner when there's a tie
1783 1784 return min(map(self.node, ancs))
1784 1785 return nullid
1785 1786
1786 1787 def _match(self, id):
1787 1788 if isinstance(id, int):
1788 1789 # rev
1789 1790 return self.node(id)
1790 1791 if len(id) == 20:
1791 1792 # possibly a binary node
1792 1793 # odds of a binary node being all hex in ASCII are 1 in 10**25
1793 1794 try:
1794 1795 node = id
1795 1796 self.rev(node) # quick search the index
1796 1797 return node
1797 1798 except LookupError:
1798 1799 pass # may be partial hex id
1799 1800 try:
1800 1801 # str(rev)
1801 1802 rev = int(id)
1802 1803 if "%d" % rev != id:
1803 1804 raise ValueError
1804 1805 if rev < 0:
1805 1806 rev = len(self) + rev
1806 1807 if rev < 0 or rev >= len(self):
1807 1808 raise ValueError
1808 1809 return self.node(rev)
1809 1810 except (ValueError, OverflowError):
1810 1811 pass
1811 1812 if len(id) == 40:
1812 1813 try:
1813 1814 # a full hex nodeid?
1814 1815 node = bin(id)
1815 1816 self.rev(node)
1816 1817 return node
1817 1818 except (TypeError, LookupError):
1818 1819 pass
1819 1820
1820 1821 def _partialmatch(self, id):
1821 1822 # we don't care wdirfilenodeids as they should be always full hash
1822 1823 maybewdir = wdirhex.startswith(id)
1823 1824 try:
1824 1825 partial = self.index.partialmatch(id)
1825 1826 if partial and self.hasnode(partial):
1826 1827 if maybewdir:
1827 1828 # single 'ff...' match in radix tree, ambiguous with wdir
1828 1829 raise RevlogError
1829 1830 return partial
1830 1831 if maybewdir:
1831 1832 # no 'ff...' match in radix tree, wdir identified
1832 1833 raise error.WdirUnsupported
1833 1834 return None
1834 1835 except RevlogError:
1835 1836 # parsers.c radix tree lookup gave multiple matches
1836 1837 # fast path: for unfiltered changelog, radix tree is accurate
1837 1838 if not getattr(self, 'filteredrevs', None):
1838 1839 raise AmbiguousPrefixLookupError(id, self.indexfile,
1839 1840 _('ambiguous identifier'))
1840 1841 # fall through to slow path that filters hidden revisions
1841 1842 except (AttributeError, ValueError):
1842 1843 # we are pure python, or key was too short to search radix tree
1843 1844 pass
1844 1845
1845 1846 if id in self._pcache:
1846 1847 return self._pcache[id]
1847 1848
1848 1849 if len(id) <= 40:
1849 1850 try:
1850 1851 # hex(node)[:...]
1851 1852 l = len(id) // 2 # grab an even number of digits
1852 1853 prefix = bin(id[:l * 2])
1853 1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1854 1855 nl = [n for n in nl if hex(n).startswith(id) and
1855 1856 self.hasnode(n)]
1857 if nullhex.startswith(id):
1858 nl.append(nullid)
1856 1859 if len(nl) > 0:
1857 1860 if len(nl) == 1 and not maybewdir:
1858 1861 self._pcache[id] = nl[0]
1859 1862 return nl[0]
1860 1863 raise AmbiguousPrefixLookupError(id, self.indexfile,
1861 1864 _('ambiguous identifier'))
1862 1865 if maybewdir:
1863 1866 raise error.WdirUnsupported
1864 1867 return None
1865 1868 except TypeError:
1866 1869 pass
1867 1870
1868 1871 def lookup(self, id):
1869 1872 """locate a node based on:
1870 1873 - revision number or str(revision number)
1871 1874 - nodeid or subset of hex nodeid
1872 1875 """
1873 1876 n = self._match(id)
1874 1877 if n is not None:
1875 1878 return n
1876 1879 n = self._partialmatch(id)
1877 1880 if n:
1878 1881 return n
1879 1882
1880 1883 raise LookupError(id, self.indexfile, _('no match found'))
1881 1884
1882 1885 def shortest(self, node, minlength=1):
1883 1886 """Find the shortest unambiguous prefix that matches node."""
1884 1887 def isvalid(prefix):
1885 1888 try:
1886 1889 node = self._partialmatch(prefix)
1887 1890 except error.RevlogError:
1888 1891 return False
1889 1892 except error.WdirUnsupported:
1890 1893 # single 'ff...' match
1891 1894 return True
1892 1895 if node is None:
1893 1896 raise LookupError(node, self.indexfile, _('no node'))
1894 1897 return True
1895 1898
1896 1899 def maybewdir(prefix):
1897 1900 return all(c == 'f' for c in prefix)
1898 1901
1899 1902 hexnode = hex(node)
1900 1903
1901 1904 def disambiguate(hexnode, minlength):
1902 1905 """Disambiguate against wdirid."""
1903 1906 for length in range(minlength, 41):
1904 1907 prefix = hexnode[:length]
1905 1908 if not maybewdir(prefix):
1906 1909 return prefix
1907 1910
1908 1911 if not getattr(self, 'filteredrevs', None):
1909 1912 try:
1910 1913 length = max(self.index.shortest(node), minlength)
1911 1914 return disambiguate(hexnode, length)
1912 1915 except RevlogError:
1913 1916 if node != wdirid:
1914 1917 raise LookupError(node, self.indexfile, _('no node'))
1915 1918 except AttributeError:
1916 1919 # Fall through to pure code
1917 1920 pass
1918 1921
1919 1922 if node == wdirid:
1920 1923 for length in range(minlength, 41):
1921 1924 prefix = hexnode[:length]
1922 1925 if isvalid(prefix):
1923 1926 return prefix
1924 1927
1925 1928 for length in range(minlength, 41):
1926 1929 prefix = hexnode[:length]
1927 1930 if isvalid(prefix):
1928 1931 return disambiguate(hexnode, length)
1929 1932
1930 1933 def cmp(self, node, text):
1931 1934 """compare text with a given file revision
1932 1935
1933 1936 returns True if text is different than what is stored.
1934 1937 """
1935 1938 p1, p2 = self.parents(node)
1936 1939 return hash(text, p1, p2) != node
1937 1940
1938 1941 def _cachesegment(self, offset, data):
1939 1942 """Add a segment to the revlog cache.
1940 1943
1941 1944 Accepts an absolute offset and the data that is at that location.
1942 1945 """
1943 1946 o, d = self._chunkcache
1944 1947 # try to add to existing cache
1945 1948 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1946 1949 self._chunkcache = o, d + data
1947 1950 else:
1948 1951 self._chunkcache = offset, data
1949 1952
1950 1953 def _readsegment(self, offset, length, df=None):
1951 1954 """Load a segment of raw data from the revlog.
1952 1955
1953 1956 Accepts an absolute offset, length to read, and an optional existing
1954 1957 file handle to read from.
1955 1958
1956 1959 If an existing file handle is passed, it will be seeked and the
1957 1960 original seek position will NOT be restored.
1958 1961
1959 1962 Returns a str or buffer of raw byte data.
1960 1963 """
1961 1964 # Cache data both forward and backward around the requested
1962 1965 # data, in a fixed size window. This helps speed up operations
1963 1966 # involving reading the revlog backwards.
1964 1967 cachesize = self._chunkcachesize
1965 1968 realoffset = offset & ~(cachesize - 1)
1966 1969 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1967 1970 - realoffset)
1968 1971 with self._datareadfp(df) as df:
1969 1972 df.seek(realoffset)
1970 1973 d = df.read(reallength)
1971 1974 self._cachesegment(realoffset, d)
1972 1975 if offset != realoffset or reallength != length:
1973 1976 return util.buffer(d, offset - realoffset, length)
1974 1977 return d
1975 1978
1976 1979 def _getsegment(self, offset, length, df=None):
1977 1980 """Obtain a segment of raw data from the revlog.
1978 1981
1979 1982 Accepts an absolute offset, length of bytes to obtain, and an
1980 1983 optional file handle to the already-opened revlog. If the file
1981 1984 handle is used, it's original seek position will not be preserved.
1982 1985
1983 1986 Requests for data may be returned from a cache.
1984 1987
1985 1988 Returns a str or a buffer instance of raw byte data.
1986 1989 """
1987 1990 o, d = self._chunkcache
1988 1991 l = len(d)
1989 1992
1990 1993 # is it in the cache?
1991 1994 cachestart = offset - o
1992 1995 cacheend = cachestart + length
1993 1996 if cachestart >= 0 and cacheend <= l:
1994 1997 if cachestart == 0 and cacheend == l:
1995 1998 return d # avoid a copy
1996 1999 return util.buffer(d, cachestart, cacheend - cachestart)
1997 2000
1998 2001 return self._readsegment(offset, length, df=df)
1999 2002
2000 2003 def _getsegmentforrevs(self, startrev, endrev, df=None):
2001 2004 """Obtain a segment of raw data corresponding to a range of revisions.
2002 2005
2003 2006 Accepts the start and end revisions and an optional already-open
2004 2007 file handle to be used for reading. If the file handle is read, its
2005 2008 seek position will not be preserved.
2006 2009
2007 2010 Requests for data may be satisfied by a cache.
2008 2011
2009 2012 Returns a 2-tuple of (offset, data) for the requested range of
2010 2013 revisions. Offset is the integer offset from the beginning of the
2011 2014 revlog and data is a str or buffer of the raw byte data.
2012 2015
2013 2016 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2014 2017 to determine where each revision's data begins and ends.
2015 2018 """
2016 2019 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2017 2020 # (functions are expensive).
2018 2021 index = self.index
2019 2022 istart = index[startrev]
2020 2023 start = int(istart[0] >> 16)
2021 2024 if startrev == endrev:
2022 2025 end = start + istart[1]
2023 2026 else:
2024 2027 iend = index[endrev]
2025 2028 end = int(iend[0] >> 16) + iend[1]
2026 2029
2027 2030 if self._inline:
2028 2031 start += (startrev + 1) * self._io.size
2029 2032 end += (endrev + 1) * self._io.size
2030 2033 length = end - start
2031 2034
2032 2035 return start, self._getsegment(start, length, df=df)
2033 2036
2034 2037 def _chunk(self, rev, df=None):
2035 2038 """Obtain a single decompressed chunk for a revision.
2036 2039
2037 2040 Accepts an integer revision and an optional already-open file handle
2038 2041 to be used for reading. If used, the seek position of the file will not
2039 2042 be preserved.
2040 2043
2041 2044 Returns a str holding uncompressed data for the requested revision.
2042 2045 """
2043 2046 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2044 2047
2045 2048 def _chunks(self, revs, df=None, targetsize=None):
2046 2049 """Obtain decompressed chunks for the specified revisions.
2047 2050
2048 2051 Accepts an iterable of numeric revisions that are assumed to be in
2049 2052 ascending order. Also accepts an optional already-open file handle
2050 2053 to be used for reading. If used, the seek position of the file will
2051 2054 not be preserved.
2052 2055
2053 2056 This function is similar to calling ``self._chunk()`` multiple times,
2054 2057 but is faster.
2055 2058
2056 2059 Returns a list with decompressed data for each requested revision.
2057 2060 """
2058 2061 if not revs:
2059 2062 return []
2060 2063 start = self.start
2061 2064 length = self.length
2062 2065 inline = self._inline
2063 2066 iosize = self._io.size
2064 2067 buffer = util.buffer
2065 2068
2066 2069 l = []
2067 2070 ladd = l.append
2068 2071
2069 2072 if not self._withsparseread:
2070 2073 slicedchunks = (revs,)
2071 2074 else:
2072 2075 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2073 2076
2074 2077 for revschunk in slicedchunks:
2075 2078 firstrev = revschunk[0]
2076 2079 # Skip trailing revisions with empty diff
2077 2080 for lastrev in revschunk[::-1]:
2078 2081 if length(lastrev) != 0:
2079 2082 break
2080 2083
2081 2084 try:
2082 2085 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2083 2086 except OverflowError:
2084 2087 # issue4215 - we can't cache a run of chunks greater than
2085 2088 # 2G on Windows
2086 2089 return [self._chunk(rev, df=df) for rev in revschunk]
2087 2090
2088 2091 decomp = self.decompress
2089 2092 for rev in revschunk:
2090 2093 chunkstart = start(rev)
2091 2094 if inline:
2092 2095 chunkstart += (rev + 1) * iosize
2093 2096 chunklength = length(rev)
2094 2097 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2095 2098
2096 2099 return l
2097 2100
2098 2101 def _chunkclear(self):
2099 2102 """Clear the raw chunk cache."""
2100 2103 self._chunkcache = (0, '')
2101 2104
2102 2105 def deltaparent(self, rev):
2103 2106 """return deltaparent of the given revision"""
2104 2107 base = self.index[rev][3]
2105 2108 if base == rev:
2106 2109 return nullrev
2107 2110 elif self._generaldelta:
2108 2111 return base
2109 2112 else:
2110 2113 return rev - 1
2111 2114
2112 2115 def issnapshot(self, rev):
2113 2116 """tells whether rev is a snapshot
2114 2117 """
2115 2118 if rev == nullrev:
2116 2119 return True
2117 2120 deltap = self.deltaparent(rev)
2118 2121 if deltap == nullrev:
2119 2122 return True
2120 2123 p1, p2 = self.parentrevs(rev)
2121 2124 if deltap in (p1, p2):
2122 2125 return False
2123 2126 return self.issnapshot(deltap)
2124 2127
2125 2128 def snapshotdepth(self, rev):
2126 2129 """number of snapshot in the chain before this one"""
2127 2130 if not self.issnapshot(rev):
2128 2131 raise ProgrammingError('revision %d not a snapshot')
2129 2132 return len(self._deltachain(rev)[0]) - 1
2130 2133
2131 2134 def revdiff(self, rev1, rev2):
2132 2135 """return or calculate a delta between two revisions
2133 2136
2134 2137 The delta calculated is in binary form and is intended to be written to
2135 2138 revlog data directly. So this function needs raw revision data.
2136 2139 """
2137 2140 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2138 2141 return bytes(self._chunk(rev2))
2139 2142
2140 2143 return mdiff.textdiff(self.revision(rev1, raw=True),
2141 2144 self.revision(rev2, raw=True))
2142 2145
2143 2146 def revision(self, nodeorrev, _df=None, raw=False):
2144 2147 """return an uncompressed revision of a given node or revision
2145 2148 number.
2146 2149
2147 2150 _df - an existing file handle to read from. (internal-only)
2148 2151 raw - an optional argument specifying if the revision data is to be
2149 2152 treated as raw data when applying flag transforms. 'raw' should be set
2150 2153 to True when generating changegroups or in debug commands.
2151 2154 """
2152 2155 if isinstance(nodeorrev, int):
2153 2156 rev = nodeorrev
2154 2157 node = self.node(rev)
2155 2158 else:
2156 2159 node = nodeorrev
2157 2160 rev = None
2158 2161
2159 2162 cachedrev = None
2160 2163 flags = None
2161 2164 rawtext = None
2162 2165 if node == nullid:
2163 2166 return ""
2164 2167 if self._cache:
2165 2168 if self._cache[0] == node:
2166 2169 # _cache only stores rawtext
2167 2170 if raw:
2168 2171 return self._cache[2]
2169 2172 # duplicated, but good for perf
2170 2173 if rev is None:
2171 2174 rev = self.rev(node)
2172 2175 if flags is None:
2173 2176 flags = self.flags(rev)
2174 2177 # no extra flags set, no flag processor runs, text = rawtext
2175 2178 if flags == REVIDX_DEFAULT_FLAGS:
2176 2179 return self._cache[2]
2177 2180 # rawtext is reusable. need to run flag processor
2178 2181 rawtext = self._cache[2]
2179 2182
2180 2183 cachedrev = self._cache[1]
2181 2184
2182 2185 # look up what we need to read
2183 2186 if rawtext is None:
2184 2187 if rev is None:
2185 2188 rev = self.rev(node)
2186 2189
2187 2190 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2188 2191 if stopped:
2189 2192 rawtext = self._cache[2]
2190 2193
2191 2194 # drop cache to save memory
2192 2195 self._cache = None
2193 2196
2194 2197 targetsize = None
2195 2198 rawsize = self.index[rev][2]
2196 2199 if 0 <= rawsize:
2197 2200 targetsize = 4 * rawsize
2198 2201
2199 2202 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2200 2203 if rawtext is None:
2201 2204 rawtext = bytes(bins[0])
2202 2205 bins = bins[1:]
2203 2206
2204 2207 rawtext = mdiff.patches(rawtext, bins)
2205 2208 self._cache = (node, rev, rawtext)
2206 2209
2207 2210 if flags is None:
2208 2211 if rev is None:
2209 2212 rev = self.rev(node)
2210 2213 flags = self.flags(rev)
2211 2214
2212 2215 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2213 2216 if validatehash:
2214 2217 self.checkhash(text, node, rev=rev)
2215 2218
2216 2219 return text
2217 2220
2218 2221 def hash(self, text, p1, p2):
2219 2222 """Compute a node hash.
2220 2223
2221 2224 Available as a function so that subclasses can replace the hash
2222 2225 as needed.
2223 2226 """
2224 2227 return hash(text, p1, p2)
2225 2228
2226 2229 def _processflags(self, text, flags, operation, raw=False):
2227 2230 """Inspect revision data flags and applies transforms defined by
2228 2231 registered flag processors.
2229 2232
2230 2233 ``text`` - the revision data to process
2231 2234 ``flags`` - the revision flags
2232 2235 ``operation`` - the operation being performed (read or write)
2233 2236 ``raw`` - an optional argument describing if the raw transform should be
2234 2237 applied.
2235 2238
2236 2239 This method processes the flags in the order (or reverse order if
2237 2240 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2238 2241 flag processors registered for present flags. The order of flags defined
2239 2242 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2240 2243
2241 2244 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2242 2245 processed text and ``validatehash`` is a bool indicating whether the
2243 2246 returned text should be checked for hash integrity.
2244 2247
2245 2248 Note: If the ``raw`` argument is set, it has precedence over the
2246 2249 operation and will only update the value of ``validatehash``.
2247 2250 """
2248 2251 # fast path: no flag processors will run
2249 2252 if flags == 0:
2250 2253 return text, True
2251 2254 if not operation in ('read', 'write'):
2252 2255 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2253 2256 # Check all flags are known.
2254 2257 if flags & ~REVIDX_KNOWN_FLAGS:
2255 2258 raise RevlogError(_("incompatible revision flag '%#x'") %
2256 2259 (flags & ~REVIDX_KNOWN_FLAGS))
2257 2260 validatehash = True
2258 2261 # Depending on the operation (read or write), the order might be
2259 2262 # reversed due to non-commutative transforms.
2260 2263 orderedflags = REVIDX_FLAGS_ORDER
2261 2264 if operation == 'write':
2262 2265 orderedflags = reversed(orderedflags)
2263 2266
2264 2267 for flag in orderedflags:
2265 2268 # If a flagprocessor has been registered for a known flag, apply the
2266 2269 # related operation transform and update result tuple.
2267 2270 if flag & flags:
2268 2271 vhash = True
2269 2272
2270 2273 if flag not in _flagprocessors:
2271 2274 message = _("missing processor for flag '%#x'") % (flag)
2272 2275 raise RevlogError(message)
2273 2276
2274 2277 processor = _flagprocessors[flag]
2275 2278 if processor is not None:
2276 2279 readtransform, writetransform, rawtransform = processor
2277 2280
2278 2281 if raw:
2279 2282 vhash = rawtransform(self, text)
2280 2283 elif operation == 'read':
2281 2284 text, vhash = readtransform(self, text)
2282 2285 else: # write operation
2283 2286 text, vhash = writetransform(self, text)
2284 2287 validatehash = validatehash and vhash
2285 2288
2286 2289 return text, validatehash
2287 2290
2288 2291 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2289 2292 """Check node hash integrity.
2290 2293
2291 2294 Available as a function so that subclasses can extend hash mismatch
2292 2295 behaviors as needed.
2293 2296 """
2294 2297 try:
2295 2298 if p1 is None and p2 is None:
2296 2299 p1, p2 = self.parents(node)
2297 2300 if node != self.hash(text, p1, p2):
2298 2301 revornode = rev
2299 2302 if revornode is None:
2300 2303 revornode = templatefilters.short(hex(node))
2301 2304 raise RevlogError(_("integrity check failed on %s:%s")
2302 2305 % (self.indexfile, pycompat.bytestr(revornode)))
2303 2306 except RevlogError:
2304 2307 if self._censorable and _censoredtext(text):
2305 2308 raise error.CensoredNodeError(self.indexfile, node, text)
2306 2309 raise
2307 2310
2308 2311 def _enforceinlinesize(self, tr, fp=None):
2309 2312 """Check if the revlog is too big for inline and convert if so.
2310 2313
2311 2314 This should be called after revisions are added to the revlog. If the
2312 2315 revlog has grown too large to be an inline revlog, it will convert it
2313 2316 to use multiple index and data files.
2314 2317 """
2315 2318 tiprev = len(self) - 1
2316 2319 if (not self._inline or
2317 2320 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2318 2321 return
2319 2322
2320 2323 trinfo = tr.find(self.indexfile)
2321 2324 if trinfo is None:
2322 2325 raise RevlogError(_("%s not found in the transaction")
2323 2326 % self.indexfile)
2324 2327
2325 2328 trindex = trinfo[2]
2326 2329 if trindex is not None:
2327 2330 dataoff = self.start(trindex)
2328 2331 else:
2329 2332 # revlog was stripped at start of transaction, use all leftover data
2330 2333 trindex = len(self) - 1
2331 2334 dataoff = self.end(tiprev)
2332 2335
2333 2336 tr.add(self.datafile, dataoff)
2334 2337
2335 2338 if fp:
2336 2339 fp.flush()
2337 2340 fp.close()
2338 2341
2339 2342 with self._datafp('w') as df:
2340 2343 for r in self:
2341 2344 df.write(self._getsegmentforrevs(r, r)[1])
2342 2345
2343 2346 with self._indexfp('w') as fp:
2344 2347 self.version &= ~FLAG_INLINE_DATA
2345 2348 self._inline = False
2346 2349 io = self._io
2347 2350 for i in self:
2348 2351 e = io.packentry(self.index[i], self.node, self.version, i)
2349 2352 fp.write(e)
2350 2353
2351 2354 # the temp file replace the real index when we exit the context
2352 2355 # manager
2353 2356
2354 2357 tr.replace(self.indexfile, trindex * self._io.size)
2355 2358 self._chunkclear()
2356 2359
2357 2360 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2358 2361 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2359 2362 """add a revision to the log
2360 2363
2361 2364 text - the revision data to add
2362 2365 transaction - the transaction object used for rollback
2363 2366 link - the linkrev data to add
2364 2367 p1, p2 - the parent nodeids of the revision
2365 2368 cachedelta - an optional precomputed delta
2366 2369 node - nodeid of revision; typically node is not specified, and it is
2367 2370 computed by default as hash(text, p1, p2), however subclasses might
2368 2371 use different hashing method (and override checkhash() in such case)
2369 2372 flags - the known flags to set on the revision
2370 2373 deltacomputer - an optional _deltacomputer instance shared between
2371 2374 multiple calls
2372 2375 """
2373 2376 if link == nullrev:
2374 2377 raise RevlogError(_("attempted to add linkrev -1 to %s")
2375 2378 % self.indexfile)
2376 2379
2377 2380 if flags:
2378 2381 node = node or self.hash(text, p1, p2)
2379 2382
2380 2383 rawtext, validatehash = self._processflags(text, flags, 'write')
2381 2384
2382 2385 # If the flag processor modifies the revision data, ignore any provided
2383 2386 # cachedelta.
2384 2387 if rawtext != text:
2385 2388 cachedelta = None
2386 2389
2387 2390 if len(rawtext) > _maxentrysize:
2388 2391 raise RevlogError(
2389 2392 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2390 2393 % (self.indexfile, len(rawtext)))
2391 2394
2392 2395 node = node or self.hash(rawtext, p1, p2)
2393 2396 if node in self.nodemap:
2394 2397 return node
2395 2398
2396 2399 if validatehash:
2397 2400 self.checkhash(rawtext, node, p1=p1, p2=p2)
2398 2401
2399 2402 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2400 2403 flags, cachedelta=cachedelta,
2401 2404 deltacomputer=deltacomputer)
2402 2405
2403 2406 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2404 2407 cachedelta=None, deltacomputer=None):
2405 2408 """add a raw revision with known flags, node and parents
2406 2409 useful when reusing a revision not stored in this revlog (ex: received
2407 2410 over wire, or read from an external bundle).
2408 2411 """
2409 2412 dfh = None
2410 2413 if not self._inline:
2411 2414 dfh = self._datafp("a+")
2412 2415 ifh = self._indexfp("a+")
2413 2416 try:
2414 2417 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2415 2418 flags, cachedelta, ifh, dfh,
2416 2419 deltacomputer=deltacomputer)
2417 2420 finally:
2418 2421 if dfh:
2419 2422 dfh.close()
2420 2423 ifh.close()
2421 2424
2422 2425 def compress(self, data):
2423 2426 """Generate a possibly-compressed representation of data."""
2424 2427 if not data:
2425 2428 return '', data
2426 2429
2427 2430 compressed = self._compressor.compress(data)
2428 2431
2429 2432 if compressed:
2430 2433 # The revlog compressor added the header in the returned data.
2431 2434 return '', compressed
2432 2435
2433 2436 if data[0:1] == '\0':
2434 2437 return '', data
2435 2438 return 'u', data
2436 2439
2437 2440 def decompress(self, data):
2438 2441 """Decompress a revlog chunk.
2439 2442
2440 2443 The chunk is expected to begin with a header identifying the
2441 2444 format type so it can be routed to an appropriate decompressor.
2442 2445 """
2443 2446 if not data:
2444 2447 return data
2445 2448
2446 2449 # Revlogs are read much more frequently than they are written and many
2447 2450 # chunks only take microseconds to decompress, so performance is
2448 2451 # important here.
2449 2452 #
2450 2453 # We can make a few assumptions about revlogs:
2451 2454 #
2452 2455 # 1) the majority of chunks will be compressed (as opposed to inline
2453 2456 # raw data).
2454 2457 # 2) decompressing *any* data will likely by at least 10x slower than
2455 2458 # returning raw inline data.
2456 2459 # 3) we want to prioritize common and officially supported compression
2457 2460 # engines
2458 2461 #
2459 2462 # It follows that we want to optimize for "decompress compressed data
2460 2463 # when encoded with common and officially supported compression engines"
2461 2464 # case over "raw data" and "data encoded by less common or non-official
2462 2465 # compression engines." That is why we have the inline lookup first
2463 2466 # followed by the compengines lookup.
2464 2467 #
2465 2468 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2466 2469 # compressed chunks. And this matters for changelog and manifest reads.
2467 2470 t = data[0:1]
2468 2471
2469 2472 if t == 'x':
2470 2473 try:
2471 2474 return _zlibdecompress(data)
2472 2475 except zlib.error as e:
2473 2476 raise RevlogError(_('revlog decompress error: %s') %
2474 2477 stringutil.forcebytestr(e))
2475 2478 # '\0' is more common than 'u' so it goes first.
2476 2479 elif t == '\0':
2477 2480 return data
2478 2481 elif t == 'u':
2479 2482 return util.buffer(data, 1)
2480 2483
2481 2484 try:
2482 2485 compressor = self._decompressors[t]
2483 2486 except KeyError:
2484 2487 try:
2485 2488 engine = util.compengines.forrevlogheader(t)
2486 2489 compressor = engine.revlogcompressor()
2487 2490 self._decompressors[t] = compressor
2488 2491 except KeyError:
2489 2492 raise RevlogError(_('unknown compression type %r') % t)
2490 2493
2491 2494 return compressor.decompress(data)
2492 2495
2493 2496 def _isgooddeltainfo(self, deltainfo, revinfo):
2494 2497 """Returns True if the given delta is good. Good means that it is within
2495 2498 the disk span, disk size, and chain length bounds that we know to be
2496 2499 performant."""
2497 2500 if deltainfo is None:
2498 2501 return False
2499 2502
2500 2503 # - 'deltainfo.distance' is the distance from the base revision --
2501 2504 # bounding it limits the amount of I/O we need to do.
2502 2505 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2503 2506 # deltas we need to apply -- bounding it limits the amount of CPU
2504 2507 # we consume.
2505 2508
2506 2509 if self._sparserevlog:
2507 2510 # As sparse-read will be used, we can consider that the distance,
2508 2511 # instead of being the span of the whole chunk,
2509 2512 # is the span of the largest read chunk
2510 2513 base = deltainfo.base
2511 2514
2512 2515 if base != nullrev:
2513 2516 deltachain = self._deltachain(base)[0]
2514 2517 else:
2515 2518 deltachain = []
2516 2519
2517 2520 # search for the first non-snapshot revision
2518 2521 for idx, r in enumerate(deltachain):
2519 2522 if not self.issnapshot(r):
2520 2523 break
2521 2524 deltachain = deltachain[idx:]
2522 2525 chunks = _slicechunk(self, deltachain, deltainfo)
2523 2526 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2524 2527 distance = max(all_span)
2525 2528 else:
2526 2529 distance = deltainfo.distance
2527 2530
2528 2531 textlen = revinfo.textlen
2529 2532 defaultmax = textlen * 4
2530 2533 maxdist = self._maxdeltachainspan
2531 2534 if not maxdist:
2532 2535 maxdist = distance # ensure the conditional pass
2533 2536 maxdist = max(maxdist, defaultmax)
2534 2537 if self._sparserevlog and maxdist < self._srmingapsize:
2535 2538 # In multiple place, we are ignoring irrelevant data range below a
2536 2539 # certain size. Be also apply this tradeoff here and relax span
2537 2540 # constraint for small enought content.
2538 2541 maxdist = self._srmingapsize
2539 2542
2540 2543 # Bad delta from read span:
2541 2544 #
2542 2545 # If the span of data read is larger than the maximum allowed.
2543 2546 if maxdist < distance:
2544 2547 return False
2545 2548
2546 2549 # Bad delta from new delta size:
2547 2550 #
2548 2551 # If the delta size is larger than the target text, storing the
2549 2552 # delta will be inefficient.
2550 2553 if textlen < deltainfo.deltalen:
2551 2554 return False
2552 2555
2553 2556 # Bad delta from cumulated payload size:
2554 2557 #
2555 2558 # If the sum of delta get larger than K * target text length.
2556 2559 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2557 2560 return False
2558 2561
2559 2562 # Bad delta from chain length:
2560 2563 #
2561 2564 # If the number of delta in the chain gets too high.
2562 2565 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2563 2566 return False
2564 2567
2565 2568 # bad delta from intermediate snapshot size limit
2566 2569 #
2567 2570 # If an intermediate snapshot size is higher than the limit. The
2568 2571 # limit exist to prevent endless chain of intermediate delta to be
2569 2572 # created.
2570 2573 if (deltainfo.snapshotdepth is not None and
2571 2574 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2572 2575 return False
2573 2576
2574 2577 # bad delta if new intermediate snapshot is larger than the previous
2575 2578 # snapshot
2576 2579 if (deltainfo.snapshotdepth
2577 2580 and self.length(deltainfo.base) < deltainfo.deltalen):
2578 2581 return False
2579 2582
2580 2583 return True
2581 2584
2582 2585 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2583 2586 cachedelta, ifh, dfh, alwayscache=False,
2584 2587 deltacomputer=None):
2585 2588 """internal function to add revisions to the log
2586 2589
2587 2590 see addrevision for argument descriptions.
2588 2591
2589 2592 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2590 2593
2591 2594 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2592 2595 be used.
2593 2596
2594 2597 invariants:
2595 2598 - rawtext is optional (can be None); if not set, cachedelta must be set.
2596 2599 if both are set, they must correspond to each other.
2597 2600 """
2598 2601 if node == nullid:
2599 2602 raise RevlogError(_("%s: attempt to add null revision") %
2600 2603 (self.indexfile))
2601 2604 if node == wdirid or node in wdirfilenodeids:
2602 2605 raise RevlogError(_("%s: attempt to add wdir revision") %
2603 2606 (self.indexfile))
2604 2607
2605 2608 if self._inline:
2606 2609 fh = ifh
2607 2610 else:
2608 2611 fh = dfh
2609 2612
2610 2613 btext = [rawtext]
2611 2614
2612 2615 curr = len(self)
2613 2616 prev = curr - 1
2614 2617 offset = self.end(prev)
2615 2618 p1r, p2r = self.rev(p1), self.rev(p2)
2616 2619
2617 2620 # full versions are inserted when the needed deltas
2618 2621 # become comparable to the uncompressed text
2619 2622 if rawtext is None:
2620 2623 # need rawtext size, before changed by flag processors, which is
2621 2624 # the non-raw size. use revlog explicitly to avoid filelog's extra
2622 2625 # logic that might remove metadata size.
2623 2626 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2624 2627 cachedelta[1])
2625 2628 else:
2626 2629 textlen = len(rawtext)
2627 2630
2628 2631 if deltacomputer is None:
2629 2632 deltacomputer = _deltacomputer(self)
2630 2633
2631 2634 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2632 2635
2633 2636 # no delta for flag processor revision (see "candelta" for why)
2634 2637 # not calling candelta since only one revision needs test, also to
2635 2638 # avoid overhead fetching flags again.
2636 2639 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2637 2640 deltainfo = None
2638 2641 else:
2639 2642 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2640 2643
2641 2644 if deltainfo is not None:
2642 2645 base = deltainfo.base
2643 2646 chainbase = deltainfo.chainbase
2644 2647 data = deltainfo.data
2645 2648 l = deltainfo.deltalen
2646 2649 else:
2647 2650 rawtext = deltacomputer.buildtext(revinfo, fh)
2648 2651 data = self.compress(rawtext)
2649 2652 l = len(data[1]) + len(data[0])
2650 2653 base = chainbase = curr
2651 2654
2652 2655 e = (offset_type(offset, flags), l, textlen,
2653 2656 base, link, p1r, p2r, node)
2654 2657 self.index.append(e)
2655 2658 self.nodemap[node] = curr
2656 2659
2657 2660 entry = self._io.packentry(e, self.node, self.version, curr)
2658 2661 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2659 2662
2660 2663 if alwayscache and rawtext is None:
2661 2664 rawtext = deltacomputer._buildtext(revinfo, fh)
2662 2665
2663 2666 if type(rawtext) == bytes: # only accept immutable objects
2664 2667 self._cache = (node, curr, rawtext)
2665 2668 self._chainbasecache[curr] = chainbase
2666 2669 return node
2667 2670
2668 2671 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2669 2672 # Files opened in a+ mode have inconsistent behavior on various
2670 2673 # platforms. Windows requires that a file positioning call be made
2671 2674 # when the file handle transitions between reads and writes. See
2672 2675 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2673 2676 # platforms, Python or the platform itself can be buggy. Some versions
2674 2677 # of Solaris have been observed to not append at the end of the file
2675 2678 # if the file was seeked to before the end. See issue4943 for more.
2676 2679 #
2677 2680 # We work around this issue by inserting a seek() before writing.
2678 2681 # Note: This is likely not necessary on Python 3.
2679 2682 ifh.seek(0, os.SEEK_END)
2680 2683 if dfh:
2681 2684 dfh.seek(0, os.SEEK_END)
2682 2685
2683 2686 curr = len(self) - 1
2684 2687 if not self._inline:
2685 2688 transaction.add(self.datafile, offset)
2686 2689 transaction.add(self.indexfile, curr * len(entry))
2687 2690 if data[0]:
2688 2691 dfh.write(data[0])
2689 2692 dfh.write(data[1])
2690 2693 ifh.write(entry)
2691 2694 else:
2692 2695 offset += curr * self._io.size
2693 2696 transaction.add(self.indexfile, offset, curr)
2694 2697 ifh.write(entry)
2695 2698 ifh.write(data[0])
2696 2699 ifh.write(data[1])
2697 2700 self._enforceinlinesize(transaction, ifh)
2698 2701
2699 2702 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2700 2703 """
2701 2704 add a delta group
2702 2705
2703 2706 given a set of deltas, add them to the revision log. the
2704 2707 first delta is against its parent, which should be in our
2705 2708 log, the rest are against the previous delta.
2706 2709
2707 2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2708 2711 this revlog and the node that was added.
2709 2712 """
2710 2713
2711 2714 nodes = []
2712 2715
2713 2716 r = len(self)
2714 2717 end = 0
2715 2718 if r:
2716 2719 end = self.end(r - 1)
2717 2720 ifh = self._indexfp("a+")
2718 2721 isize = r * self._io.size
2719 2722 if self._inline:
2720 2723 transaction.add(self.indexfile, end + isize, r)
2721 2724 dfh = None
2722 2725 else:
2723 2726 transaction.add(self.indexfile, isize, r)
2724 2727 transaction.add(self.datafile, end)
2725 2728 dfh = self._datafp("a+")
2726 2729 def flush():
2727 2730 if dfh:
2728 2731 dfh.flush()
2729 2732 ifh.flush()
2730 2733 try:
2731 2734 deltacomputer = _deltacomputer(self)
2732 2735 # loop through our set of deltas
2733 2736 for data in deltas:
2734 2737 node, p1, p2, linknode, deltabase, delta, flags = data
2735 2738 link = linkmapper(linknode)
2736 2739 flags = flags or REVIDX_DEFAULT_FLAGS
2737 2740
2738 2741 nodes.append(node)
2739 2742
2740 2743 if node in self.nodemap:
2741 2744 # this can happen if two branches make the same change
2742 2745 continue
2743 2746
2744 2747 for p in (p1, p2):
2745 2748 if p not in self.nodemap:
2746 2749 raise LookupError(p, self.indexfile,
2747 2750 _('unknown parent'))
2748 2751
2749 2752 if deltabase not in self.nodemap:
2750 2753 raise LookupError(deltabase, self.indexfile,
2751 2754 _('unknown delta base'))
2752 2755
2753 2756 baserev = self.rev(deltabase)
2754 2757
2755 2758 if baserev != nullrev and self.iscensored(baserev):
2756 2759 # if base is censored, delta must be full replacement in a
2757 2760 # single patch operation
2758 2761 hlen = struct.calcsize(">lll")
2759 2762 oldlen = self.rawsize(baserev)
2760 2763 newlen = len(delta) - hlen
2761 2764 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2762 2765 raise error.CensoredBaseError(self.indexfile,
2763 2766 self.node(baserev))
2764 2767
2765 2768 if not flags and self._peek_iscensored(baserev, delta, flush):
2766 2769 flags |= REVIDX_ISCENSORED
2767 2770
2768 2771 # We assume consumers of addrevisioncb will want to retrieve
2769 2772 # the added revision, which will require a call to
2770 2773 # revision(). revision() will fast path if there is a cache
2771 2774 # hit. So, we tell _addrevision() to always cache in this case.
2772 2775 # We're only using addgroup() in the context of changegroup
2773 2776 # generation so the revision data can always be handled as raw
2774 2777 # by the flagprocessor.
2775 2778 self._addrevision(node, None, transaction, link,
2776 2779 p1, p2, flags, (baserev, delta),
2777 2780 ifh, dfh,
2778 2781 alwayscache=bool(addrevisioncb),
2779 2782 deltacomputer=deltacomputer)
2780 2783
2781 2784 if addrevisioncb:
2782 2785 addrevisioncb(self, node)
2783 2786
2784 2787 if not dfh and not self._inline:
2785 2788 # addrevision switched from inline to conventional
2786 2789 # reopen the index
2787 2790 ifh.close()
2788 2791 dfh = self._datafp("a+")
2789 2792 ifh = self._indexfp("a+")
2790 2793 finally:
2791 2794 if dfh:
2792 2795 dfh.close()
2793 2796 ifh.close()
2794 2797
2795 2798 return nodes
2796 2799
2797 2800 def iscensored(self, rev):
2798 2801 """Check if a file revision is censored."""
2799 2802 if not self._censorable:
2800 2803 return False
2801 2804
2802 2805 return self.flags(rev) & REVIDX_ISCENSORED
2803 2806
2804 2807 def _peek_iscensored(self, baserev, delta, flush):
2805 2808 """Quickly check if a delta produces a censored revision."""
2806 2809 if not self._censorable:
2807 2810 return False
2808 2811
2809 2812 # Fragile heuristic: unless new file meta keys are added alphabetically
2810 2813 # preceding "censored", all censored revisions are prefixed by
2811 2814 # "\1\ncensored:". A delta producing such a censored revision must be a
2812 2815 # full-replacement delta, so we inspect the first and only patch in the
2813 2816 # delta for this prefix.
2814 2817 hlen = struct.calcsize(">lll")
2815 2818 if len(delta) <= hlen:
2816 2819 return False
2817 2820
2818 2821 oldlen = self.rawsize(baserev)
2819 2822 newlen = len(delta) - hlen
2820 2823 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2821 2824 return False
2822 2825
2823 2826 add = "\1\ncensored:"
2824 2827 addlen = len(add)
2825 2828 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2826 2829
2827 2830 def getstrippoint(self, minlink):
2828 2831 """find the minimum rev that must be stripped to strip the linkrev
2829 2832
2830 2833 Returns a tuple containing the minimum rev and a set of all revs that
2831 2834 have linkrevs that will be broken by this strip.
2832 2835 """
2833 2836 brokenrevs = set()
2834 2837 strippoint = len(self)
2835 2838
2836 2839 heads = {}
2837 2840 futurelargelinkrevs = set()
2838 2841 for head in self.headrevs():
2839 2842 headlinkrev = self.linkrev(head)
2840 2843 heads[head] = headlinkrev
2841 2844 if headlinkrev >= minlink:
2842 2845 futurelargelinkrevs.add(headlinkrev)
2843 2846
2844 2847 # This algorithm involves walking down the rev graph, starting at the
2845 2848 # heads. Since the revs are topologically sorted according to linkrev,
2846 2849 # once all head linkrevs are below the minlink, we know there are
2847 2850 # no more revs that could have a linkrev greater than minlink.
2848 2851 # So we can stop walking.
2849 2852 while futurelargelinkrevs:
2850 2853 strippoint -= 1
2851 2854 linkrev = heads.pop(strippoint)
2852 2855
2853 2856 if linkrev < minlink:
2854 2857 brokenrevs.add(strippoint)
2855 2858 else:
2856 2859 futurelargelinkrevs.remove(linkrev)
2857 2860
2858 2861 for p in self.parentrevs(strippoint):
2859 2862 if p != nullrev:
2860 2863 plinkrev = self.linkrev(p)
2861 2864 heads[p] = plinkrev
2862 2865 if plinkrev >= minlink:
2863 2866 futurelargelinkrevs.add(plinkrev)
2864 2867
2865 2868 return strippoint, brokenrevs
2866 2869
2867 2870 def strip(self, minlink, transaction):
2868 2871 """truncate the revlog on the first revision with a linkrev >= minlink
2869 2872
2870 2873 This function is called when we're stripping revision minlink and
2871 2874 its descendants from the repository.
2872 2875
2873 2876 We have to remove all revisions with linkrev >= minlink, because
2874 2877 the equivalent changelog revisions will be renumbered after the
2875 2878 strip.
2876 2879
2877 2880 So we truncate the revlog on the first of these revisions, and
2878 2881 trust that the caller has saved the revisions that shouldn't be
2879 2882 removed and that it'll re-add them after this truncation.
2880 2883 """
2881 2884 if len(self) == 0:
2882 2885 return
2883 2886
2884 2887 rev, _ = self.getstrippoint(minlink)
2885 2888 if rev == len(self):
2886 2889 return
2887 2890
2888 2891 # first truncate the files on disk
2889 2892 end = self.start(rev)
2890 2893 if not self._inline:
2891 2894 transaction.add(self.datafile, end)
2892 2895 end = rev * self._io.size
2893 2896 else:
2894 2897 end += rev * self._io.size
2895 2898
2896 2899 transaction.add(self.indexfile, end)
2897 2900
2898 2901 # then reset internal state in memory to forget those revisions
2899 2902 self._cache = None
2900 2903 self._chaininfocache = {}
2901 2904 self._chunkclear()
2902 2905 for x in pycompat.xrange(rev, len(self)):
2903 2906 del self.nodemap[self.node(x)]
2904 2907
2905 2908 del self.index[rev:-1]
2906 2909 self._nodepos = None
2907 2910
2908 2911 def checksize(self):
2909 2912 expected = 0
2910 2913 if len(self):
2911 2914 expected = max(0, self.end(len(self) - 1))
2912 2915
2913 2916 try:
2914 2917 with self._datafp() as f:
2915 2918 f.seek(0, 2)
2916 2919 actual = f.tell()
2917 2920 dd = actual - expected
2918 2921 except IOError as inst:
2919 2922 if inst.errno != errno.ENOENT:
2920 2923 raise
2921 2924 dd = 0
2922 2925
2923 2926 try:
2924 2927 f = self.opener(self.indexfile)
2925 2928 f.seek(0, 2)
2926 2929 actual = f.tell()
2927 2930 f.close()
2928 2931 s = self._io.size
2929 2932 i = max(0, actual // s)
2930 2933 di = actual - (i * s)
2931 2934 if self._inline:
2932 2935 databytes = 0
2933 2936 for r in self:
2934 2937 databytes += max(0, self.length(r))
2935 2938 dd = 0
2936 2939 di = actual - len(self) * s - databytes
2937 2940 except IOError as inst:
2938 2941 if inst.errno != errno.ENOENT:
2939 2942 raise
2940 2943 di = 0
2941 2944
2942 2945 return (dd, di)
2943 2946
2944 2947 def files(self):
2945 2948 res = [self.indexfile]
2946 2949 if not self._inline:
2947 2950 res.append(self.datafile)
2948 2951 return res
2949 2952
2950 2953 DELTAREUSEALWAYS = 'always'
2951 2954 DELTAREUSESAMEREVS = 'samerevs'
2952 2955 DELTAREUSENEVER = 'never'
2953 2956
2954 2957 DELTAREUSEFULLADD = 'fulladd'
2955 2958
2956 2959 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2957 2960
2958 2961 def clone(self, tr, destrevlog, addrevisioncb=None,
2959 2962 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2960 2963 """Copy this revlog to another, possibly with format changes.
2961 2964
2962 2965 The destination revlog will contain the same revisions and nodes.
2963 2966 However, it may not be bit-for-bit identical due to e.g. delta encoding
2964 2967 differences.
2965 2968
2966 2969 The ``deltareuse`` argument control how deltas from the existing revlog
2967 2970 are preserved in the destination revlog. The argument can have the
2968 2971 following values:
2969 2972
2970 2973 DELTAREUSEALWAYS
2971 2974 Deltas will always be reused (if possible), even if the destination
2972 2975 revlog would not select the same revisions for the delta. This is the
2973 2976 fastest mode of operation.
2974 2977 DELTAREUSESAMEREVS
2975 2978 Deltas will be reused if the destination revlog would pick the same
2976 2979 revisions for the delta. This mode strikes a balance between speed
2977 2980 and optimization.
2978 2981 DELTAREUSENEVER
2979 2982 Deltas will never be reused. This is the slowest mode of execution.
2980 2983 This mode can be used to recompute deltas (e.g. if the diff/delta
2981 2984 algorithm changes).
2982 2985
2983 2986 Delta computation can be slow, so the choice of delta reuse policy can
2984 2987 significantly affect run time.
2985 2988
2986 2989 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2987 2990 two extremes. Deltas will be reused if they are appropriate. But if the
2988 2991 delta could choose a better revision, it will do so. This means if you
2989 2992 are converting a non-generaldelta revlog to a generaldelta revlog,
2990 2993 deltas will be recomputed if the delta's parent isn't a parent of the
2991 2994 revision.
2992 2995
2993 2996 In addition to the delta policy, the ``deltabothparents`` argument
2994 2997 controls whether to compute deltas against both parents for merges.
2995 2998 By default, the current default is used.
2996 2999 """
2997 3000 if deltareuse not in self.DELTAREUSEALL:
2998 3001 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2999 3002
3000 3003 if len(destrevlog):
3001 3004 raise ValueError(_('destination revlog is not empty'))
3002 3005
3003 3006 if getattr(self, 'filteredrevs', None):
3004 3007 raise ValueError(_('source revlog has filtered revisions'))
3005 3008 if getattr(destrevlog, 'filteredrevs', None):
3006 3009 raise ValueError(_('destination revlog has filtered revisions'))
3007 3010
3008 3011 # lazydeltabase controls whether to reuse a cached delta, if possible.
3009 3012 oldlazydeltabase = destrevlog._lazydeltabase
3010 3013 oldamd = destrevlog._deltabothparents
3011 3014
3012 3015 try:
3013 3016 if deltareuse == self.DELTAREUSEALWAYS:
3014 3017 destrevlog._lazydeltabase = True
3015 3018 elif deltareuse == self.DELTAREUSESAMEREVS:
3016 3019 destrevlog._lazydeltabase = False
3017 3020
3018 3021 destrevlog._deltabothparents = deltabothparents or oldamd
3019 3022
3020 3023 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3021 3024 self.DELTAREUSESAMEREVS)
3022 3025
3023 3026 deltacomputer = _deltacomputer(destrevlog)
3024 3027 index = self.index
3025 3028 for rev in self:
3026 3029 entry = index[rev]
3027 3030
3028 3031 # Some classes override linkrev to take filtered revs into
3029 3032 # account. Use raw entry from index.
3030 3033 flags = entry[0] & 0xffff
3031 3034 linkrev = entry[4]
3032 3035 p1 = index[entry[5]][7]
3033 3036 p2 = index[entry[6]][7]
3034 3037 node = entry[7]
3035 3038
3036 3039 # (Possibly) reuse the delta from the revlog if allowed and
3037 3040 # the revlog chunk is a delta.
3038 3041 cachedelta = None
3039 3042 rawtext = None
3040 3043 if populatecachedelta:
3041 3044 dp = self.deltaparent(rev)
3042 3045 if dp != nullrev:
3043 3046 cachedelta = (dp, bytes(self._chunk(rev)))
3044 3047
3045 3048 if not cachedelta:
3046 3049 rawtext = self.revision(rev, raw=True)
3047 3050
3048 3051
3049 3052 if deltareuse == self.DELTAREUSEFULLADD:
3050 3053 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
3051 3054 cachedelta=cachedelta,
3052 3055 node=node, flags=flags,
3053 3056 deltacomputer=deltacomputer)
3054 3057 else:
3055 3058 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
3056 3059 checkambig=False)
3057 3060 dfh = None
3058 3061 if not destrevlog._inline:
3059 3062 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
3060 3063 try:
3061 3064 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
3062 3065 p2, flags, cachedelta, ifh, dfh,
3063 3066 deltacomputer=deltacomputer)
3064 3067 finally:
3065 3068 if dfh:
3066 3069 dfh.close()
3067 3070 ifh.close()
3068 3071
3069 3072 if addrevisioncb:
3070 3073 addrevisioncb(self, rev, node)
3071 3074 finally:
3072 3075 destrevlog._lazydeltabase = oldlazydeltabase
3073 3076 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now