##// END OF EJS Templates
revlog: reintroduce `revlog.descendant` as deprecated...
Boris Feld -
r38732:4ac3c207 default
parent child Browse files
Show More
@@ -1,2900 +1,2905 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 class _testrevlog(object):
200 200 """minimalist fake revlog to use in doctests"""
201 201
202 202 def __init__(self, data, density=0.5, mingap=0):
203 203 """data is an list of revision payload boundaries"""
204 204 self._data = data
205 205 self._srdensitythreshold = density
206 206 self._srmingapsize = mingap
207 207
208 208 def start(self, rev):
209 209 if rev == 0:
210 210 return 0
211 211 return self._data[rev - 1]
212 212
213 213 def end(self, rev):
214 214 return self._data[rev]
215 215
216 216 def length(self, rev):
217 217 return self.end(rev) - self.start(rev)
218 218
219 219 def _trimchunk(revlog, revs, startidx, endidx=None):
220 220 """returns revs[startidx:endidx] without empty trailing revs
221 221
222 222 Doctest Setup
223 223 >>> revlog = _testrevlog([
224 224 ... 5, #0
225 225 ... 10, #1
226 226 ... 12, #2
227 227 ... 12, #3 (empty)
228 228 ... 17, #4
229 229 ... 21, #5
230 230 ... 21, #6 (empty)
231 231 ... ])
232 232
233 233 Contiguous cases:
234 234 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
235 235 [0, 1, 2, 3, 4, 5]
236 236 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
237 237 [0, 1, 2, 3, 4]
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
239 239 [0, 1, 2]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
241 241 [2]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
243 243 [3, 4, 5]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
245 245 [3, 4]
246 246
247 247 Discontiguous cases:
248 248 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
249 249 [1, 3, 5]
250 250 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
251 251 [1]
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
253 253 [3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
255 255 [3, 5]
256 256 """
257 257 length = revlog.length
258 258
259 259 if endidx is None:
260 260 endidx = len(revs)
261 261
262 262 # Trim empty revs at the end, but never the very first revision of a chain
263 263 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
264 264 endidx -= 1
265 265
266 266 return revs[startidx:endidx]
267 267
268 268 def _segmentspan(revlog, revs):
269 269 """Get the byte span of a segment of revisions
270 270
271 271 revs is a sorted array of revision numbers
272 272
273 273 >>> revlog = _testrevlog([
274 274 ... 5, #0
275 275 ... 10, #1
276 276 ... 12, #2
277 277 ... 12, #3 (empty)
278 278 ... 17, #4
279 279 ... ])
280 280
281 281 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
282 282 17
283 283 >>> _segmentspan(revlog, [0, 4])
284 284 17
285 285 >>> _segmentspan(revlog, [3, 4])
286 286 5
287 287 >>> _segmentspan(revlog, [1, 2, 3,])
288 288 7
289 289 >>> _segmentspan(revlog, [1, 3])
290 290 7
291 291 """
292 292 if not revs:
293 293 return 0
294 294 return revlog.end(revs[-1]) - revlog.start(revs[0])
295 295
296 296 def _slicechunk(revlog, revs, targetsize=None):
297 297 """slice revs to reduce the amount of unrelated data to be read from disk.
298 298
299 299 ``revs`` is sliced into groups that should be read in one time.
300 300 Assume that revs are sorted.
301 301
302 302 The initial chunk is sliced until the overall density (payload/chunks-span
303 303 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
304 304 `revlog._srmingapsize` is skipped.
305 305
306 306 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
307 307 For consistency with other slicing choice, this limit won't go lower than
308 308 `revlog._srmingapsize`.
309 309
310 310 If individual revisions chunk are larger than this limit, they will still
311 311 be raised individually.
312 312
313 313 >>> revlog = _testrevlog([
314 314 ... 5, #00 (5)
315 315 ... 10, #01 (5)
316 316 ... 12, #02 (2)
317 317 ... 12, #03 (empty)
318 318 ... 27, #04 (15)
319 319 ... 31, #05 (4)
320 320 ... 31, #06 (empty)
321 321 ... 42, #07 (11)
322 322 ... 47, #08 (5)
323 323 ... 47, #09 (empty)
324 324 ... 48, #10 (1)
325 325 ... 51, #11 (3)
326 326 ... 74, #12 (23)
327 327 ... 85, #13 (11)
328 328 ... 86, #14 (1)
329 329 ... 91, #15 (5)
330 330 ... ])
331 331
332 332 >>> list(_slicechunk(revlog, list(range(16))))
333 333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 334 >>> list(_slicechunk(revlog, [0, 15]))
335 335 [[0], [15]]
336 336 >>> list(_slicechunk(revlog, [0, 11, 15]))
337 337 [[0], [11], [15]]
338 338 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
339 339 [[0], [11, 13, 15]]
340 340 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 341 [[1, 2], [5, 8, 10, 11], [14]]
342 342
343 343 Slicing with a maximum chunk size
344 344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15))
345 345 [[0], [11], [13], [15]]
346 346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20))
347 347 [[0], [11], [13, 15]]
348 348 """
349 349 if targetsize is not None:
350 350 targetsize = max(targetsize, revlog._srmingapsize)
351 351 for chunk in _slicechunktodensity(revlog, revs,
352 352 revlog._srdensitythreshold,
353 353 revlog._srmingapsize):
354 354 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
355 355 yield subchunk
356 356
357 357 def _slicechunktosize(revlog, revs, targetsize):
358 358 """slice revs to match the target size
359 359
360 360 This is intended to be used on chunk that density slicing selected by that
361 361 are still too large compared to the read garantee of revlog. This might
362 362 happens when "minimal gap size" interrupted the slicing or when chain are
363 363 built in a way that create large blocks next to each other.
364 364
365 365 >>> revlog = _testrevlog([
366 366 ... 3, #0 (3)
367 367 ... 5, #1 (2)
368 368 ... 6, #2 (1)
369 369 ... 8, #3 (2)
370 370 ... 8, #4 (empty)
371 371 ... 11, #5 (3)
372 372 ... 12, #6 (1)
373 373 ... 13, #7 (1)
374 374 ... 14, #8 (1)
375 375 ... ])
376 376
377 377 Cases where chunk is already small enough
378 378 >>> list(_slicechunktosize(revlog, [0], 3))
379 379 [[0]]
380 380 >>> list(_slicechunktosize(revlog, [6, 7], 3))
381 381 [[6, 7]]
382 382 >>> list(_slicechunktosize(revlog, [0], None))
383 383 [[0]]
384 384 >>> list(_slicechunktosize(revlog, [6, 7], None))
385 385 [[6, 7]]
386 386
387 387 cases where we need actual slicing
388 388 >>> list(_slicechunktosize(revlog, [0, 1], 3))
389 389 [[0], [1]]
390 390 >>> list(_slicechunktosize(revlog, [1, 3], 3))
391 391 [[1], [3]]
392 392 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
393 393 [[1, 2], [3]]
394 394 >>> list(_slicechunktosize(revlog, [3, 5], 3))
395 395 [[3], [5]]
396 396 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
397 397 [[3], [5]]
398 398 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
399 399 [[5], [6, 7, 8]]
400 400 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
401 401 [[0], [1, 2], [3], [5], [6, 7, 8]]
402 402
403 403 Case with too large individual chunk (must return valid chunk)
404 404 >>> list(_slicechunktosize(revlog, [0, 1], 2))
405 405 [[0], [1]]
406 406 >>> list(_slicechunktosize(revlog, [1, 3], 1))
407 407 [[1], [3]]
408 408 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
409 409 [[3], [5]]
410 410 """
411 411 assert targetsize is None or 0 <= targetsize
412 412 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
413 413 yield revs
414 414 return
415 415
416 416 startrevidx = 0
417 417 startdata = revlog.start(revs[0])
418 418 endrevidx = 0
419 419 iterrevs = enumerate(revs)
420 420 next(iterrevs) # skip first rev.
421 421 for idx, r in iterrevs:
422 422 span = revlog.end(r) - startdata
423 423 if span <= targetsize:
424 424 endrevidx = idx
425 425 else:
426 426 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
427 427 if chunk:
428 428 yield chunk
429 429 startrevidx = idx
430 430 startdata = revlog.start(r)
431 431 endrevidx = idx
432 432 yield _trimchunk(revlog, revs, startrevidx)
433 433
434 434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
435 435 """slice revs to reduce the amount of unrelated data to be read from disk.
436 436
437 437 ``revs`` is sliced into groups that should be read in one time.
438 438 Assume that revs are sorted.
439 439
440 440 The initial chunk is sliced until the overall density (payload/chunks-span
441 441 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
442 442 skipped.
443 443
444 444 >>> revlog = _testrevlog([
445 445 ... 5, #00 (5)
446 446 ... 10, #01 (5)
447 447 ... 12, #02 (2)
448 448 ... 12, #03 (empty)
449 449 ... 27, #04 (15)
450 450 ... 31, #05 (4)
451 451 ... 31, #06 (empty)
452 452 ... 42, #07 (11)
453 453 ... 47, #08 (5)
454 454 ... 47, #09 (empty)
455 455 ... 48, #10 (1)
456 456 ... 51, #11 (3)
457 457 ... 74, #12 (23)
458 458 ... 85, #13 (11)
459 459 ... 86, #14 (1)
460 460 ... 91, #15 (5)
461 461 ... ])
462 462
463 463 >>> list(_slicechunktodensity(revlog, list(range(16))))
464 464 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
465 465 >>> list(_slicechunktodensity(revlog, [0, 15]))
466 466 [[0], [15]]
467 467 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
468 468 [[0], [11], [15]]
469 469 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
470 470 [[0], [11, 13, 15]]
471 471 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
472 472 [[1, 2], [5, 8, 10, 11], [14]]
473 473 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
474 474 ... mingapsize=20))
475 475 [[1, 2, 3, 5, 8, 10, 11], [14]]
476 476 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
477 477 ... targetdensity=0.95))
478 478 [[1, 2], [5], [8, 10, 11], [14]]
479 479 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
480 480 ... targetdensity=0.95, mingapsize=12))
481 481 [[1, 2], [5, 8, 10, 11], [14]]
482 482 """
483 483 start = revlog.start
484 484 length = revlog.length
485 485
486 486 if len(revs) <= 1:
487 487 yield revs
488 488 return
489 489
490 490 readdata = deltachainspan = _segmentspan(revlog, revs)
491 491
492 492 if deltachainspan < mingapsize:
493 493 yield revs
494 494 return
495 495
496 496 chainpayload = sum(length(r) for r in revs)
497 497
498 498 if deltachainspan:
499 499 density = chainpayload / float(deltachainspan)
500 500 else:
501 501 density = 1.0
502 502
503 503 if density >= targetdensity:
504 504 yield revs
505 505 return
506 506
507 507 # Store the gaps in a heap to have them sorted by decreasing size
508 508 gapsheap = []
509 509 heapq.heapify(gapsheap)
510 510 prevend = None
511 511 for i, rev in enumerate(revs):
512 512 revstart = start(rev)
513 513 revlen = length(rev)
514 514
515 515 # Skip empty revisions to form larger holes
516 516 if revlen == 0:
517 517 continue
518 518
519 519 if prevend is not None:
520 520 gapsize = revstart - prevend
521 521 # only consider holes that are large enough
522 522 if gapsize > mingapsize:
523 523 heapq.heappush(gapsheap, (-gapsize, i))
524 524
525 525 prevend = revstart + revlen
526 526
527 527 # Collect the indices of the largest holes until the density is acceptable
528 528 indicesheap = []
529 529 heapq.heapify(indicesheap)
530 530 while gapsheap and density < targetdensity:
531 531 oppgapsize, gapidx = heapq.heappop(gapsheap)
532 532
533 533 heapq.heappush(indicesheap, gapidx)
534 534
535 535 # the gap sizes are stored as negatives to be sorted decreasingly
536 536 # by the heap
537 537 readdata -= (-oppgapsize)
538 538 if readdata > 0:
539 539 density = chainpayload / float(readdata)
540 540 else:
541 541 density = 1.0
542 542
543 543 # Cut the revs at collected indices
544 544 previdx = 0
545 545 while indicesheap:
546 546 idx = heapq.heappop(indicesheap)
547 547
548 548 chunk = _trimchunk(revlog, revs, previdx, idx)
549 549 if chunk:
550 550 yield chunk
551 551
552 552 previdx = idx
553 553
554 554 chunk = _trimchunk(revlog, revs, previdx)
555 555 if chunk:
556 556 yield chunk
557 557
558 558 @attr.s(slots=True, frozen=True)
559 559 class _deltainfo(object):
560 560 distance = attr.ib()
561 561 deltalen = attr.ib()
562 562 data = attr.ib()
563 563 base = attr.ib()
564 564 chainbase = attr.ib()
565 565 chainlen = attr.ib()
566 566 compresseddeltalen = attr.ib()
567 567
568 568 class _deltacomputer(object):
569 569 def __init__(self, revlog):
570 570 self.revlog = revlog
571 571
572 572 def _getcandidaterevs(self, p1, p2, cachedelta):
573 573 """
574 574 Provides revisions that present an interest to be diffed against,
575 575 grouped by level of easiness.
576 576 """
577 577 revlog = self.revlog
578 578 gdelta = revlog._generaldelta
579 579 curr = len(revlog)
580 580 prev = curr - 1
581 581 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
582 582
583 583 # should we try to build a delta?
584 584 if prev != nullrev and revlog.storedeltachains:
585 585 tested = set()
586 586 # This condition is true most of the time when processing
587 587 # changegroup data into a generaldelta repo. The only time it
588 588 # isn't true is if this is the first revision in a delta chain
589 589 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
590 590 if cachedelta and gdelta and revlog._lazydeltabase:
591 591 # Assume what we received from the server is a good choice
592 592 # build delta will reuse the cache
593 593 yield (cachedelta[0],)
594 594 tested.add(cachedelta[0])
595 595
596 596 if gdelta:
597 597 # exclude already lazy tested base if any
598 598 parents = [p for p in (p1r, p2r)
599 599 if p != nullrev and p not in tested]
600 600
601 601 if not revlog._aggressivemergedeltas and len(parents) == 2:
602 602 parents.sort()
603 603 # To minimize the chance of having to build a fulltext,
604 604 # pick first whichever parent is closest to us (max rev)
605 605 yield (parents[1],)
606 606 # then the other one (min rev) if the first did not fit
607 607 yield (parents[0],)
608 608 tested.update(parents)
609 609 elif len(parents) > 0:
610 610 # Test all parents (1 or 2), and keep the best candidate
611 611 yield parents
612 612 tested.update(parents)
613 613
614 614 if prev not in tested:
615 615 # other approach failed try against prev to hopefully save us a
616 616 # fulltext.
617 617 yield (prev,)
618 618 tested.add(prev)
619 619
620 620 def buildtext(self, revinfo, fh):
621 621 """Builds a fulltext version of a revision
622 622
623 623 revinfo: _revisioninfo instance that contains all needed info
624 624 fh: file handle to either the .i or the .d revlog file,
625 625 depending on whether it is inlined or not
626 626 """
627 627 btext = revinfo.btext
628 628 if btext[0] is not None:
629 629 return btext[0]
630 630
631 631 revlog = self.revlog
632 632 cachedelta = revinfo.cachedelta
633 633 flags = revinfo.flags
634 634 node = revinfo.node
635 635
636 636 baserev = cachedelta[0]
637 637 delta = cachedelta[1]
638 638 # special case deltas which replace entire base; no need to decode
639 639 # base revision. this neatly avoids censored bases, which throw when
640 640 # they're decoded.
641 641 hlen = struct.calcsize(">lll")
642 642 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
643 643 len(delta) - hlen):
644 644 btext[0] = delta[hlen:]
645 645 else:
646 646 # deltabase is rawtext before changed by flag processors, which is
647 647 # equivalent to non-raw text
648 648 basetext = revlog.revision(baserev, _df=fh, raw=False)
649 649 btext[0] = mdiff.patch(basetext, delta)
650 650
651 651 try:
652 652 res = revlog._processflags(btext[0], flags, 'read', raw=True)
653 653 btext[0], validatehash = res
654 654 if validatehash:
655 655 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
656 656 if flags & REVIDX_ISCENSORED:
657 657 raise RevlogError(_('node %s is not censored') % node)
658 658 except CensoredNodeError:
659 659 # must pass the censored index flag to add censored revisions
660 660 if not flags & REVIDX_ISCENSORED:
661 661 raise
662 662 return btext[0]
663 663
664 664 def _builddeltadiff(self, base, revinfo, fh):
665 665 revlog = self.revlog
666 666 t = self.buildtext(revinfo, fh)
667 667 if revlog.iscensored(base):
668 668 # deltas based on a censored revision must replace the
669 669 # full content in one patch, so delta works everywhere
670 670 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
671 671 delta = header + t
672 672 else:
673 673 ptext = revlog.revision(base, _df=fh, raw=True)
674 674 delta = mdiff.textdiff(ptext, t)
675 675
676 676 return delta
677 677
678 678 def _builddeltainfo(self, revinfo, base, fh):
679 679 # can we use the cached delta?
680 680 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
681 681 delta = revinfo.cachedelta[1]
682 682 else:
683 683 delta = self._builddeltadiff(base, revinfo, fh)
684 684 revlog = self.revlog
685 685 header, data = revlog.compress(delta)
686 686 deltalen = len(header) + len(data)
687 687 chainbase = revlog.chainbase(base)
688 688 offset = revlog.end(len(revlog) - 1)
689 689 dist = deltalen + offset - revlog.start(chainbase)
690 690 if revlog._generaldelta:
691 691 deltabase = base
692 692 else:
693 693 deltabase = chainbase
694 694 chainlen, compresseddeltalen = revlog._chaininfo(base)
695 695 chainlen += 1
696 696 compresseddeltalen += deltalen
697 697 return _deltainfo(dist, deltalen, (header, data), deltabase,
698 698 chainbase, chainlen, compresseddeltalen)
699 699
700 700 def finddeltainfo(self, revinfo, fh):
701 701 """Find an acceptable delta against a candidate revision
702 702
703 703 revinfo: information about the revision (instance of _revisioninfo)
704 704 fh: file handle to either the .i or the .d revlog file,
705 705 depending on whether it is inlined or not
706 706
707 707 Returns the first acceptable candidate revision, as ordered by
708 708 _getcandidaterevs
709 709 """
710 710 cachedelta = revinfo.cachedelta
711 711 p1 = revinfo.p1
712 712 p2 = revinfo.p2
713 713 revlog = self.revlog
714 714
715 715 deltainfo = None
716 716 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
717 717 nominateddeltas = []
718 718 for candidaterev in candidaterevs:
719 719 # no delta for rawtext-changing revs (see "candelta" for why)
720 720 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
721 721 continue
722 722 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
723 723 if revlog._isgooddeltainfo(candidatedelta, revinfo):
724 724 nominateddeltas.append(candidatedelta)
725 725 if nominateddeltas:
726 726 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
727 727 break
728 728
729 729 return deltainfo
730 730
731 731 @attr.s(slots=True, frozen=True)
732 732 class _revisioninfo(object):
733 733 """Information about a revision that allows building its fulltext
734 734 node: expected hash of the revision
735 735 p1, p2: parent revs of the revision
736 736 btext: built text cache consisting of a one-element list
737 737 cachedelta: (baserev, uncompressed_delta) or None
738 738 flags: flags associated to the revision storage
739 739
740 740 One of btext[0] or cachedelta must be set.
741 741 """
742 742 node = attr.ib()
743 743 p1 = attr.ib()
744 744 p2 = attr.ib()
745 745 btext = attr.ib()
746 746 textlen = attr.ib()
747 747 cachedelta = attr.ib()
748 748 flags = attr.ib()
749 749
750 750 # index v0:
751 751 # 4 bytes: offset
752 752 # 4 bytes: compressed length
753 753 # 4 bytes: base rev
754 754 # 4 bytes: link rev
755 755 # 20 bytes: parent 1 nodeid
756 756 # 20 bytes: parent 2 nodeid
757 757 # 20 bytes: nodeid
758 758 indexformatv0 = struct.Struct(">4l20s20s20s")
759 759 indexformatv0_pack = indexformatv0.pack
760 760 indexformatv0_unpack = indexformatv0.unpack
761 761
762 762 class revlogoldio(object):
763 763 def __init__(self):
764 764 self.size = indexformatv0.size
765 765
766 766 def parseindex(self, data, inline):
767 767 s = self.size
768 768 index = []
769 769 nodemap = {nullid: nullrev}
770 770 n = off = 0
771 771 l = len(data)
772 772 while off + s <= l:
773 773 cur = data[off:off + s]
774 774 off += s
775 775 e = indexformatv0_unpack(cur)
776 776 # transform to revlogv1 format
777 777 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
778 778 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
779 779 index.append(e2)
780 780 nodemap[e[6]] = n
781 781 n += 1
782 782
783 783 # add the magic null revision at -1
784 784 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
785 785
786 786 return index, nodemap, None
787 787
788 788 def packentry(self, entry, node, version, rev):
789 789 if gettype(entry[0]):
790 790 raise RevlogError(_('index entry flags need revlog version 1'))
791 791 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
792 792 node(entry[5]), node(entry[6]), entry[7])
793 793 return indexformatv0_pack(*e2)
794 794
795 795 # index ng:
796 796 # 6 bytes: offset
797 797 # 2 bytes: flags
798 798 # 4 bytes: compressed length
799 799 # 4 bytes: uncompressed length
800 800 # 4 bytes: base rev
801 801 # 4 bytes: link rev
802 802 # 4 bytes: parent 1 rev
803 803 # 4 bytes: parent 2 rev
804 804 # 32 bytes: nodeid
805 805 indexformatng = struct.Struct(">Qiiiiii20s12x")
806 806 indexformatng_pack = indexformatng.pack
807 807 versionformat = struct.Struct(">I")
808 808 versionformat_pack = versionformat.pack
809 809 versionformat_unpack = versionformat.unpack
810 810
811 811 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
812 812 # signed integer)
813 813 _maxentrysize = 0x7fffffff
814 814
815 815 class revlogio(object):
816 816 def __init__(self):
817 817 self.size = indexformatng.size
818 818
819 819 def parseindex(self, data, inline):
820 820 # call the C implementation to parse the index data
821 821 index, cache = parsers.parse_index2(data, inline)
822 822 return index, getattr(index, 'nodemap', None), cache
823 823
824 824 def packentry(self, entry, node, version, rev):
825 825 p = indexformatng_pack(*entry)
826 826 if rev == 0:
827 827 p = versionformat_pack(version) + p[4:]
828 828 return p
829 829
830 830 class revlog(object):
831 831 """
832 832 the underlying revision storage object
833 833
834 834 A revlog consists of two parts, an index and the revision data.
835 835
836 836 The index is a file with a fixed record size containing
837 837 information on each revision, including its nodeid (hash), the
838 838 nodeids of its parents, the position and offset of its data within
839 839 the data file, and the revision it's based on. Finally, each entry
840 840 contains a linkrev entry that can serve as a pointer to external
841 841 data.
842 842
843 843 The revision data itself is a linear collection of data chunks.
844 844 Each chunk represents a revision and is usually represented as a
845 845 delta against the previous chunk. To bound lookup time, runs of
846 846 deltas are limited to about 2 times the length of the original
847 847 version data. This makes retrieval of a version proportional to
848 848 its size, or O(1) relative to the number of revisions.
849 849
850 850 Both pieces of the revlog are written to in an append-only
851 851 fashion, which means we never need to rewrite a file to insert or
852 852 remove data, and can use some simple techniques to avoid the need
853 853 for locking while reading.
854 854
855 855 If checkambig, indexfile is opened with checkambig=True at
856 856 writing, to avoid file stat ambiguity.
857 857
858 858 If mmaplargeindex is True, and an mmapindexthreshold is set, the
859 859 index will be mmapped rather than read if it is larger than the
860 860 configured threshold.
861 861
862 862 If censorable is True, the revlog can have censored revisions.
863 863 """
864 864 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
865 865 mmaplargeindex=False, censorable=False):
866 866 """
867 867 create a revlog object
868 868
869 869 opener is a function that abstracts the file opening operation
870 870 and can be used to implement COW semantics or the like.
871 871 """
872 872 self.indexfile = indexfile
873 873 self.datafile = datafile or (indexfile[:-2] + ".d")
874 874 self.opener = opener
875 875 # When True, indexfile is opened with checkambig=True at writing, to
876 876 # avoid file stat ambiguity.
877 877 self._checkambig = checkambig
878 878 self._censorable = censorable
879 879 # 3-tuple of (node, rev, text) for a raw revision.
880 880 self._cache = None
881 881 # Maps rev to chain base rev.
882 882 self._chainbasecache = util.lrucachedict(100)
883 883 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
884 884 self._chunkcache = (0, '')
885 885 # How much data to read and cache into the raw revlog data cache.
886 886 self._chunkcachesize = 65536
887 887 self._maxchainlen = None
888 888 self._aggressivemergedeltas = True
889 889 self.index = []
890 890 # Mapping of partial identifiers to full nodes.
891 891 self._pcache = {}
892 892 # Mapping of revision integer to full node.
893 893 self._nodecache = {nullid: nullrev}
894 894 self._nodepos = None
895 895 self._compengine = 'zlib'
896 896 self._maxdeltachainspan = -1
897 897 self._withsparseread = False
898 898 self._srdensitythreshold = 0.50
899 899 self._srmingapsize = 262144
900 900
901 901 mmapindexthreshold = None
902 902 v = REVLOG_DEFAULT_VERSION
903 903 opts = getattr(opener, 'options', None)
904 904 if opts is not None:
905 905 if 'revlogv2' in opts:
906 906 # version 2 revlogs always use generaldelta.
907 907 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
908 908 elif 'revlogv1' in opts:
909 909 if 'generaldelta' in opts:
910 910 v |= FLAG_GENERALDELTA
911 911 else:
912 912 v = 0
913 913 if 'chunkcachesize' in opts:
914 914 self._chunkcachesize = opts['chunkcachesize']
915 915 if 'maxchainlen' in opts:
916 916 self._maxchainlen = opts['maxchainlen']
917 917 if 'aggressivemergedeltas' in opts:
918 918 self._aggressivemergedeltas = opts['aggressivemergedeltas']
919 919 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
920 920 if 'compengine' in opts:
921 921 self._compengine = opts['compengine']
922 922 if 'maxdeltachainspan' in opts:
923 923 self._maxdeltachainspan = opts['maxdeltachainspan']
924 924 if mmaplargeindex and 'mmapindexthreshold' in opts:
925 925 mmapindexthreshold = opts['mmapindexthreshold']
926 926 self._withsparseread = bool(opts.get('with-sparse-read', False))
927 927 if 'sparse-read-density-threshold' in opts:
928 928 self._srdensitythreshold = opts['sparse-read-density-threshold']
929 929 if 'sparse-read-min-gap-size' in opts:
930 930 self._srmingapsize = opts['sparse-read-min-gap-size']
931 931
932 932 if self._chunkcachesize <= 0:
933 933 raise RevlogError(_('revlog chunk cache size %r is not greater '
934 934 'than 0') % self._chunkcachesize)
935 935 elif self._chunkcachesize & (self._chunkcachesize - 1):
936 936 raise RevlogError(_('revlog chunk cache size %r is not a power '
937 937 'of 2') % self._chunkcachesize)
938 938
939 939 indexdata = ''
940 940 self._initempty = True
941 941 try:
942 942 with self._indexfp() as f:
943 943 if (mmapindexthreshold is not None and
944 944 self.opener.fstat(f).st_size >= mmapindexthreshold):
945 945 indexdata = util.buffer(util.mmapread(f))
946 946 else:
947 947 indexdata = f.read()
948 948 if len(indexdata) > 0:
949 949 v = versionformat_unpack(indexdata[:4])[0]
950 950 self._initempty = False
951 951 except IOError as inst:
952 952 if inst.errno != errno.ENOENT:
953 953 raise
954 954
955 955 self.version = v
956 956 self._inline = v & FLAG_INLINE_DATA
957 957 self._generaldelta = v & FLAG_GENERALDELTA
958 958 flags = v & ~0xFFFF
959 959 fmt = v & 0xFFFF
960 960 if fmt == REVLOGV0:
961 961 if flags:
962 962 raise RevlogError(_('unknown flags (%#04x) in version %d '
963 963 'revlog %s') %
964 964 (flags >> 16, fmt, self.indexfile))
965 965 elif fmt == REVLOGV1:
966 966 if flags & ~REVLOGV1_FLAGS:
967 967 raise RevlogError(_('unknown flags (%#04x) in version %d '
968 968 'revlog %s') %
969 969 (flags >> 16, fmt, self.indexfile))
970 970 elif fmt == REVLOGV2:
971 971 if flags & ~REVLOGV2_FLAGS:
972 972 raise RevlogError(_('unknown flags (%#04x) in version %d '
973 973 'revlog %s') %
974 974 (flags >> 16, fmt, self.indexfile))
975 975 else:
976 976 raise RevlogError(_('unknown version (%d) in revlog %s') %
977 977 (fmt, self.indexfile))
978 978
979 979 self.storedeltachains = True
980 980
981 981 self._io = revlogio()
982 982 if self.version == REVLOGV0:
983 983 self._io = revlogoldio()
984 984 try:
985 985 d = self._io.parseindex(indexdata, self._inline)
986 986 except (ValueError, IndexError):
987 987 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
988 988 self.index, nodemap, self._chunkcache = d
989 989 if nodemap is not None:
990 990 self.nodemap = self._nodecache = nodemap
991 991 if not self._chunkcache:
992 992 self._chunkclear()
993 993 # revnum -> (chain-length, sum-delta-length)
994 994 self._chaininfocache = {}
995 995 # revlog header -> revlog compressor
996 996 self._decompressors = {}
997 997
998 998 @util.propertycache
999 999 def _compressor(self):
1000 1000 return util.compengines[self._compengine].revlogcompressor()
1001 1001
1002 1002 def _indexfp(self, mode='r'):
1003 1003 """file object for the revlog's index file"""
1004 1004 args = {r'mode': mode}
1005 1005 if mode != 'r':
1006 1006 args[r'checkambig'] = self._checkambig
1007 1007 if mode == 'w':
1008 1008 args[r'atomictemp'] = True
1009 1009 return self.opener(self.indexfile, **args)
1010 1010
1011 1011 def _datafp(self, mode='r'):
1012 1012 """file object for the revlog's data file"""
1013 1013 return self.opener(self.datafile, mode=mode)
1014 1014
1015 1015 @contextlib.contextmanager
1016 1016 def _datareadfp(self, existingfp=None):
1017 1017 """file object suitable to read data"""
1018 1018 if existingfp is not None:
1019 1019 yield existingfp
1020 1020 else:
1021 1021 if self._inline:
1022 1022 func = self._indexfp
1023 1023 else:
1024 1024 func = self._datafp
1025 1025 with func() as fp:
1026 1026 yield fp
1027 1027
1028 1028 def tip(self):
1029 1029 return self.node(len(self.index) - 2)
1030 1030 def __contains__(self, rev):
1031 1031 return 0 <= rev < len(self)
1032 1032 def __len__(self):
1033 1033 return len(self.index) - 1
1034 1034 def __iter__(self):
1035 1035 return iter(xrange(len(self)))
1036 1036 def revs(self, start=0, stop=None):
1037 1037 """iterate over all rev in this revlog (from start to stop)"""
1038 1038 step = 1
1039 1039 if stop is not None:
1040 1040 if start > stop:
1041 1041 step = -1
1042 1042 stop += step
1043 1043 else:
1044 1044 stop = len(self)
1045 1045 return xrange(start, stop, step)
1046 1046
1047 1047 @util.propertycache
1048 1048 def nodemap(self):
1049 1049 self.rev(self.node(0))
1050 1050 return self._nodecache
1051 1051
1052 1052 def hasnode(self, node):
1053 1053 try:
1054 1054 self.rev(node)
1055 1055 return True
1056 1056 except KeyError:
1057 1057 return False
1058 1058
1059 1059 def candelta(self, baserev, rev):
1060 1060 """whether two revisions (baserev, rev) can be delta-ed or not"""
1061 1061 # Disable delta if either rev requires a content-changing flag
1062 1062 # processor (ex. LFS). This is because such flag processor can alter
1063 1063 # the rawtext content that the delta will be based on, and two clients
1064 1064 # could have a same revlog node with different flags (i.e. different
1065 1065 # rawtext contents) and the delta could be incompatible.
1066 1066 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1067 1067 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1068 1068 return False
1069 1069 return True
1070 1070
1071 1071 def clearcaches(self):
1072 1072 self._cache = None
1073 1073 self._chainbasecache.clear()
1074 1074 self._chunkcache = (0, '')
1075 1075 self._pcache = {}
1076 1076
1077 1077 try:
1078 1078 self._nodecache.clearcaches()
1079 1079 except AttributeError:
1080 1080 self._nodecache = {nullid: nullrev}
1081 1081 self._nodepos = None
1082 1082
1083 1083 def rev(self, node):
1084 1084 try:
1085 1085 return self._nodecache[node]
1086 1086 except TypeError:
1087 1087 raise
1088 1088 except RevlogError:
1089 1089 # parsers.c radix tree lookup failed
1090 1090 if node == wdirid or node in wdirfilenodeids:
1091 1091 raise error.WdirUnsupported
1092 1092 raise LookupError(node, self.indexfile, _('no node'))
1093 1093 except KeyError:
1094 1094 # pure python cache lookup failed
1095 1095 n = self._nodecache
1096 1096 i = self.index
1097 1097 p = self._nodepos
1098 1098 if p is None:
1099 1099 p = len(i) - 2
1100 1100 else:
1101 1101 assert p < len(i)
1102 1102 for r in xrange(p, -1, -1):
1103 1103 v = i[r][7]
1104 1104 n[v] = r
1105 1105 if v == node:
1106 1106 self._nodepos = r - 1
1107 1107 return r
1108 1108 if node == wdirid or node in wdirfilenodeids:
1109 1109 raise error.WdirUnsupported
1110 1110 raise LookupError(node, self.indexfile, _('no node'))
1111 1111
1112 1112 # Accessors for index entries.
1113 1113
1114 1114 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1115 1115 # are flags.
1116 1116 def start(self, rev):
1117 1117 return int(self.index[rev][0] >> 16)
1118 1118
1119 1119 def flags(self, rev):
1120 1120 return self.index[rev][0] & 0xFFFF
1121 1121
1122 1122 def length(self, rev):
1123 1123 return self.index[rev][1]
1124 1124
1125 1125 def rawsize(self, rev):
1126 1126 """return the length of the uncompressed text for a given revision"""
1127 1127 l = self.index[rev][2]
1128 1128 if l >= 0:
1129 1129 return l
1130 1130
1131 1131 t = self.revision(rev, raw=True)
1132 1132 return len(t)
1133 1133
1134 1134 def size(self, rev):
1135 1135 """length of non-raw text (processed by a "read" flag processor)"""
1136 1136 # fast path: if no "read" flag processor could change the content,
1137 1137 # size is rawsize. note: ELLIPSIS is known to not change the content.
1138 1138 flags = self.flags(rev)
1139 1139 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1140 1140 return self.rawsize(rev)
1141 1141
1142 1142 return len(self.revision(rev, raw=False))
1143 1143
1144 1144 def chainbase(self, rev):
1145 1145 base = self._chainbasecache.get(rev)
1146 1146 if base is not None:
1147 1147 return base
1148 1148
1149 1149 index = self.index
1150 1150 iterrev = rev
1151 1151 base = index[iterrev][3]
1152 1152 while base != iterrev:
1153 1153 iterrev = base
1154 1154 base = index[iterrev][3]
1155 1155
1156 1156 self._chainbasecache[rev] = base
1157 1157 return base
1158 1158
1159 1159 def linkrev(self, rev):
1160 1160 return self.index[rev][4]
1161 1161
1162 1162 def parentrevs(self, rev):
1163 1163 try:
1164 1164 entry = self.index[rev]
1165 1165 except IndexError:
1166 1166 if rev == wdirrev:
1167 1167 raise error.WdirUnsupported
1168 1168 raise
1169 1169
1170 1170 return entry[5], entry[6]
1171 1171
1172 1172 def node(self, rev):
1173 1173 try:
1174 1174 return self.index[rev][7]
1175 1175 except IndexError:
1176 1176 if rev == wdirrev:
1177 1177 raise error.WdirUnsupported
1178 1178 raise
1179 1179
1180 1180 # Derived from index values.
1181 1181
1182 1182 def end(self, rev):
1183 1183 return self.start(rev) + self.length(rev)
1184 1184
1185 1185 def parents(self, node):
1186 1186 i = self.index
1187 1187 d = i[self.rev(node)]
1188 1188 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1189 1189
1190 1190 def chainlen(self, rev):
1191 1191 return self._chaininfo(rev)[0]
1192 1192
1193 1193 def _chaininfo(self, rev):
1194 1194 chaininfocache = self._chaininfocache
1195 1195 if rev in chaininfocache:
1196 1196 return chaininfocache[rev]
1197 1197 index = self.index
1198 1198 generaldelta = self._generaldelta
1199 1199 iterrev = rev
1200 1200 e = index[iterrev]
1201 1201 clen = 0
1202 1202 compresseddeltalen = 0
1203 1203 while iterrev != e[3]:
1204 1204 clen += 1
1205 1205 compresseddeltalen += e[1]
1206 1206 if generaldelta:
1207 1207 iterrev = e[3]
1208 1208 else:
1209 1209 iterrev -= 1
1210 1210 if iterrev in chaininfocache:
1211 1211 t = chaininfocache[iterrev]
1212 1212 clen += t[0]
1213 1213 compresseddeltalen += t[1]
1214 1214 break
1215 1215 e = index[iterrev]
1216 1216 else:
1217 1217 # Add text length of base since decompressing that also takes
1218 1218 # work. For cache hits the length is already included.
1219 1219 compresseddeltalen += e[1]
1220 1220 r = (clen, compresseddeltalen)
1221 1221 chaininfocache[rev] = r
1222 1222 return r
1223 1223
1224 1224 def _deltachain(self, rev, stoprev=None):
1225 1225 """Obtain the delta chain for a revision.
1226 1226
1227 1227 ``stoprev`` specifies a revision to stop at. If not specified, we
1228 1228 stop at the base of the chain.
1229 1229
1230 1230 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1231 1231 revs in ascending order and ``stopped`` is a bool indicating whether
1232 1232 ``stoprev`` was hit.
1233 1233 """
1234 1234 # Try C implementation.
1235 1235 try:
1236 1236 return self.index.deltachain(rev, stoprev, self._generaldelta)
1237 1237 except AttributeError:
1238 1238 pass
1239 1239
1240 1240 chain = []
1241 1241
1242 1242 # Alias to prevent attribute lookup in tight loop.
1243 1243 index = self.index
1244 1244 generaldelta = self._generaldelta
1245 1245
1246 1246 iterrev = rev
1247 1247 e = index[iterrev]
1248 1248 while iterrev != e[3] and iterrev != stoprev:
1249 1249 chain.append(iterrev)
1250 1250 if generaldelta:
1251 1251 iterrev = e[3]
1252 1252 else:
1253 1253 iterrev -= 1
1254 1254 e = index[iterrev]
1255 1255
1256 1256 if iterrev == stoprev:
1257 1257 stopped = True
1258 1258 else:
1259 1259 chain.append(iterrev)
1260 1260 stopped = False
1261 1261
1262 1262 chain.reverse()
1263 1263 return chain, stopped
1264 1264
1265 1265 def ancestors(self, revs, stoprev=0, inclusive=False):
1266 1266 """Generate the ancestors of 'revs' in reverse topological order.
1267 1267 Does not generate revs lower than stoprev.
1268 1268
1269 1269 See the documentation for ancestor.lazyancestors for more details."""
1270 1270
1271 1271 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1272 1272 inclusive=inclusive)
1273 1273
1274 1274 def descendants(self, revs):
1275 1275 """Generate the descendants of 'revs' in revision order.
1276 1276
1277 1277 Yield a sequence of revision numbers starting with a child of
1278 1278 some rev in revs, i.e., each revision is *not* considered a
1279 1279 descendant of itself. Results are ordered by revision number (a
1280 1280 topological sort)."""
1281 1281 first = min(revs)
1282 1282 if first == nullrev:
1283 1283 for i in self:
1284 1284 yield i
1285 1285 return
1286 1286
1287 1287 seen = set(revs)
1288 1288 for i in self.revs(start=first + 1):
1289 1289 for x in self.parentrevs(i):
1290 1290 if x != nullrev and x in seen:
1291 1291 seen.add(i)
1292 1292 yield i
1293 1293 break
1294 1294
1295 1295 def findcommonmissing(self, common=None, heads=None):
1296 1296 """Return a tuple of the ancestors of common and the ancestors of heads
1297 1297 that are not ancestors of common. In revset terminology, we return the
1298 1298 tuple:
1299 1299
1300 1300 ::common, (::heads) - (::common)
1301 1301
1302 1302 The list is sorted by revision number, meaning it is
1303 1303 topologically sorted.
1304 1304
1305 1305 'heads' and 'common' are both lists of node IDs. If heads is
1306 1306 not supplied, uses all of the revlog's heads. If common is not
1307 1307 supplied, uses nullid."""
1308 1308 if common is None:
1309 1309 common = [nullid]
1310 1310 if heads is None:
1311 1311 heads = self.heads()
1312 1312
1313 1313 common = [self.rev(n) for n in common]
1314 1314 heads = [self.rev(n) for n in heads]
1315 1315
1316 1316 # we want the ancestors, but inclusive
1317 1317 class lazyset(object):
1318 1318 def __init__(self, lazyvalues):
1319 1319 self.addedvalues = set()
1320 1320 self.lazyvalues = lazyvalues
1321 1321
1322 1322 def __contains__(self, value):
1323 1323 return value in self.addedvalues or value in self.lazyvalues
1324 1324
1325 1325 def __iter__(self):
1326 1326 added = self.addedvalues
1327 1327 for r in added:
1328 1328 yield r
1329 1329 for r in self.lazyvalues:
1330 1330 if not r in added:
1331 1331 yield r
1332 1332
1333 1333 def add(self, value):
1334 1334 self.addedvalues.add(value)
1335 1335
1336 1336 def update(self, values):
1337 1337 self.addedvalues.update(values)
1338 1338
1339 1339 has = lazyset(self.ancestors(common))
1340 1340 has.add(nullrev)
1341 1341 has.update(common)
1342 1342
1343 1343 # take all ancestors from heads that aren't in has
1344 1344 missing = set()
1345 1345 visit = collections.deque(r for r in heads if r not in has)
1346 1346 while visit:
1347 1347 r = visit.popleft()
1348 1348 if r in missing:
1349 1349 continue
1350 1350 else:
1351 1351 missing.add(r)
1352 1352 for p in self.parentrevs(r):
1353 1353 if p not in has:
1354 1354 visit.append(p)
1355 1355 missing = list(missing)
1356 1356 missing.sort()
1357 1357 return has, [self.node(miss) for miss in missing]
1358 1358
1359 1359 def incrementalmissingrevs(self, common=None):
1360 1360 """Return an object that can be used to incrementally compute the
1361 1361 revision numbers of the ancestors of arbitrary sets that are not
1362 1362 ancestors of common. This is an ancestor.incrementalmissingancestors
1363 1363 object.
1364 1364
1365 1365 'common' is a list of revision numbers. If common is not supplied, uses
1366 1366 nullrev.
1367 1367 """
1368 1368 if common is None:
1369 1369 common = [nullrev]
1370 1370
1371 1371 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1372 1372
1373 1373 def findmissingrevs(self, common=None, heads=None):
1374 1374 """Return the revision numbers of the ancestors of heads that
1375 1375 are not ancestors of common.
1376 1376
1377 1377 More specifically, return a list of revision numbers corresponding to
1378 1378 nodes N such that every N satisfies the following constraints:
1379 1379
1380 1380 1. N is an ancestor of some node in 'heads'
1381 1381 2. N is not an ancestor of any node in 'common'
1382 1382
1383 1383 The list is sorted by revision number, meaning it is
1384 1384 topologically sorted.
1385 1385
1386 1386 'heads' and 'common' are both lists of revision numbers. If heads is
1387 1387 not supplied, uses all of the revlog's heads. If common is not
1388 1388 supplied, uses nullid."""
1389 1389 if common is None:
1390 1390 common = [nullrev]
1391 1391 if heads is None:
1392 1392 heads = self.headrevs()
1393 1393
1394 1394 inc = self.incrementalmissingrevs(common=common)
1395 1395 return inc.missingancestors(heads)
1396 1396
1397 1397 def findmissing(self, common=None, heads=None):
1398 1398 """Return the ancestors of heads that are not ancestors of common.
1399 1399
1400 1400 More specifically, return a list of nodes N such that every N
1401 1401 satisfies the following constraints:
1402 1402
1403 1403 1. N is an ancestor of some node in 'heads'
1404 1404 2. N is not an ancestor of any node in 'common'
1405 1405
1406 1406 The list is sorted by revision number, meaning it is
1407 1407 topologically sorted.
1408 1408
1409 1409 'heads' and 'common' are both lists of node IDs. If heads is
1410 1410 not supplied, uses all of the revlog's heads. If common is not
1411 1411 supplied, uses nullid."""
1412 1412 if common is None:
1413 1413 common = [nullid]
1414 1414 if heads is None:
1415 1415 heads = self.heads()
1416 1416
1417 1417 common = [self.rev(n) for n in common]
1418 1418 heads = [self.rev(n) for n in heads]
1419 1419
1420 1420 inc = self.incrementalmissingrevs(common=common)
1421 1421 return [self.node(r) for r in inc.missingancestors(heads)]
1422 1422
1423 1423 def nodesbetween(self, roots=None, heads=None):
1424 1424 """Return a topological path from 'roots' to 'heads'.
1425 1425
1426 1426 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1427 1427 topologically sorted list of all nodes N that satisfy both of
1428 1428 these constraints:
1429 1429
1430 1430 1. N is a descendant of some node in 'roots'
1431 1431 2. N is an ancestor of some node in 'heads'
1432 1432
1433 1433 Every node is considered to be both a descendant and an ancestor
1434 1434 of itself, so every reachable node in 'roots' and 'heads' will be
1435 1435 included in 'nodes'.
1436 1436
1437 1437 'outroots' is the list of reachable nodes in 'roots', i.e., the
1438 1438 subset of 'roots' that is returned in 'nodes'. Likewise,
1439 1439 'outheads' is the subset of 'heads' that is also in 'nodes'.
1440 1440
1441 1441 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1442 1442 unspecified, uses nullid as the only root. If 'heads' is
1443 1443 unspecified, uses list of all of the revlog's heads."""
1444 1444 nonodes = ([], [], [])
1445 1445 if roots is not None:
1446 1446 roots = list(roots)
1447 1447 if not roots:
1448 1448 return nonodes
1449 1449 lowestrev = min([self.rev(n) for n in roots])
1450 1450 else:
1451 1451 roots = [nullid] # Everybody's a descendant of nullid
1452 1452 lowestrev = nullrev
1453 1453 if (lowestrev == nullrev) and (heads is None):
1454 1454 # We want _all_ the nodes!
1455 1455 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1456 1456 if heads is None:
1457 1457 # All nodes are ancestors, so the latest ancestor is the last
1458 1458 # node.
1459 1459 highestrev = len(self) - 1
1460 1460 # Set ancestors to None to signal that every node is an ancestor.
1461 1461 ancestors = None
1462 1462 # Set heads to an empty dictionary for later discovery of heads
1463 1463 heads = {}
1464 1464 else:
1465 1465 heads = list(heads)
1466 1466 if not heads:
1467 1467 return nonodes
1468 1468 ancestors = set()
1469 1469 # Turn heads into a dictionary so we can remove 'fake' heads.
1470 1470 # Also, later we will be using it to filter out the heads we can't
1471 1471 # find from roots.
1472 1472 heads = dict.fromkeys(heads, False)
1473 1473 # Start at the top and keep marking parents until we're done.
1474 1474 nodestotag = set(heads)
1475 1475 # Remember where the top was so we can use it as a limit later.
1476 1476 highestrev = max([self.rev(n) for n in nodestotag])
1477 1477 while nodestotag:
1478 1478 # grab a node to tag
1479 1479 n = nodestotag.pop()
1480 1480 # Never tag nullid
1481 1481 if n == nullid:
1482 1482 continue
1483 1483 # A node's revision number represents its place in a
1484 1484 # topologically sorted list of nodes.
1485 1485 r = self.rev(n)
1486 1486 if r >= lowestrev:
1487 1487 if n not in ancestors:
1488 1488 # If we are possibly a descendant of one of the roots
1489 1489 # and we haven't already been marked as an ancestor
1490 1490 ancestors.add(n) # Mark as ancestor
1491 1491 # Add non-nullid parents to list of nodes to tag.
1492 1492 nodestotag.update([p for p in self.parents(n) if
1493 1493 p != nullid])
1494 1494 elif n in heads: # We've seen it before, is it a fake head?
1495 1495 # So it is, real heads should not be the ancestors of
1496 1496 # any other heads.
1497 1497 heads.pop(n)
1498 1498 if not ancestors:
1499 1499 return nonodes
1500 1500 # Now that we have our set of ancestors, we want to remove any
1501 1501 # roots that are not ancestors.
1502 1502
1503 1503 # If one of the roots was nullid, everything is included anyway.
1504 1504 if lowestrev > nullrev:
1505 1505 # But, since we weren't, let's recompute the lowest rev to not
1506 1506 # include roots that aren't ancestors.
1507 1507
1508 1508 # Filter out roots that aren't ancestors of heads
1509 1509 roots = [root for root in roots if root in ancestors]
1510 1510 # Recompute the lowest revision
1511 1511 if roots:
1512 1512 lowestrev = min([self.rev(root) for root in roots])
1513 1513 else:
1514 1514 # No more roots? Return empty list
1515 1515 return nonodes
1516 1516 else:
1517 1517 # We are descending from nullid, and don't need to care about
1518 1518 # any other roots.
1519 1519 lowestrev = nullrev
1520 1520 roots = [nullid]
1521 1521 # Transform our roots list into a set.
1522 1522 descendants = set(roots)
1523 1523 # Also, keep the original roots so we can filter out roots that aren't
1524 1524 # 'real' roots (i.e. are descended from other roots).
1525 1525 roots = descendants.copy()
1526 1526 # Our topologically sorted list of output nodes.
1527 1527 orderedout = []
1528 1528 # Don't start at nullid since we don't want nullid in our output list,
1529 1529 # and if nullid shows up in descendants, empty parents will look like
1530 1530 # they're descendants.
1531 1531 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1532 1532 n = self.node(r)
1533 1533 isdescendant = False
1534 1534 if lowestrev == nullrev: # Everybody is a descendant of nullid
1535 1535 isdescendant = True
1536 1536 elif n in descendants:
1537 1537 # n is already a descendant
1538 1538 isdescendant = True
1539 1539 # This check only needs to be done here because all the roots
1540 1540 # will start being marked is descendants before the loop.
1541 1541 if n in roots:
1542 1542 # If n was a root, check if it's a 'real' root.
1543 1543 p = tuple(self.parents(n))
1544 1544 # If any of its parents are descendants, it's not a root.
1545 1545 if (p[0] in descendants) or (p[1] in descendants):
1546 1546 roots.remove(n)
1547 1547 else:
1548 1548 p = tuple(self.parents(n))
1549 1549 # A node is a descendant if either of its parents are
1550 1550 # descendants. (We seeded the dependents list with the roots
1551 1551 # up there, remember?)
1552 1552 if (p[0] in descendants) or (p[1] in descendants):
1553 1553 descendants.add(n)
1554 1554 isdescendant = True
1555 1555 if isdescendant and ((ancestors is None) or (n in ancestors)):
1556 1556 # Only include nodes that are both descendants and ancestors.
1557 1557 orderedout.append(n)
1558 1558 if (ancestors is not None) and (n in heads):
1559 1559 # We're trying to figure out which heads are reachable
1560 1560 # from roots.
1561 1561 # Mark this head as having been reached
1562 1562 heads[n] = True
1563 1563 elif ancestors is None:
1564 1564 # Otherwise, we're trying to discover the heads.
1565 1565 # Assume this is a head because if it isn't, the next step
1566 1566 # will eventually remove it.
1567 1567 heads[n] = True
1568 1568 # But, obviously its parents aren't.
1569 1569 for p in self.parents(n):
1570 1570 heads.pop(p, None)
1571 1571 heads = [head for head, flag in heads.iteritems() if flag]
1572 1572 roots = list(roots)
1573 1573 assert orderedout
1574 1574 assert roots
1575 1575 assert heads
1576 1576 return (orderedout, roots, heads)
1577 1577
1578 1578 def headrevs(self):
1579 1579 try:
1580 1580 return self.index.headrevs()
1581 1581 except AttributeError:
1582 1582 return self._headrevs()
1583 1583
1584 1584 def computephases(self, roots):
1585 1585 return self.index.computephasesmapsets(roots)
1586 1586
1587 1587 def _headrevs(self):
1588 1588 count = len(self)
1589 1589 if not count:
1590 1590 return [nullrev]
1591 1591 # we won't iter over filtered rev so nobody is a head at start
1592 1592 ishead = [0] * (count + 1)
1593 1593 index = self.index
1594 1594 for r in self:
1595 1595 ishead[r] = 1 # I may be an head
1596 1596 e = index[r]
1597 1597 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1598 1598 return [r for r, val in enumerate(ishead) if val]
1599 1599
1600 1600 def heads(self, start=None, stop=None):
1601 1601 """return the list of all nodes that have no children
1602 1602
1603 1603 if start is specified, only heads that are descendants of
1604 1604 start will be returned
1605 1605 if stop is specified, it will consider all the revs from stop
1606 1606 as if they had no children
1607 1607 """
1608 1608 if start is None and stop is None:
1609 1609 if not len(self):
1610 1610 return [nullid]
1611 1611 return [self.node(r) for r in self.headrevs()]
1612 1612
1613 1613 if start is None:
1614 1614 start = nullid
1615 1615 if stop is None:
1616 1616 stop = []
1617 1617 stoprevs = set([self.rev(n) for n in stop])
1618 1618 startrev = self.rev(start)
1619 1619 reachable = {startrev}
1620 1620 heads = {startrev}
1621 1621
1622 1622 parentrevs = self.parentrevs
1623 1623 for r in self.revs(start=startrev + 1):
1624 1624 for p in parentrevs(r):
1625 1625 if p in reachable:
1626 1626 if r not in stoprevs:
1627 1627 reachable.add(r)
1628 1628 heads.add(r)
1629 1629 if p in heads and p not in stoprevs:
1630 1630 heads.remove(p)
1631 1631
1632 1632 return [self.node(r) for r in heads]
1633 1633
1634 1634 def children(self, node):
1635 1635 """find the children of a given node"""
1636 1636 c = []
1637 1637 p = self.rev(node)
1638 1638 for r in self.revs(start=p + 1):
1639 1639 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1640 1640 if prevs:
1641 1641 for pr in prevs:
1642 1642 if pr == p:
1643 1643 c.append(self.node(r))
1644 1644 elif p == nullrev:
1645 1645 c.append(self.node(r))
1646 1646 return c
1647 1647
1648 1648 def commonancestorsheads(self, a, b):
1649 1649 """calculate all the heads of the common ancestors of nodes a and b"""
1650 1650 a, b = self.rev(a), self.rev(b)
1651 1651 ancs = self._commonancestorsheads(a, b)
1652 1652 return pycompat.maplist(self.node, ancs)
1653 1653
1654 1654 def _commonancestorsheads(self, *revs):
1655 1655 """calculate all the heads of the common ancestors of revs"""
1656 1656 try:
1657 1657 ancs = self.index.commonancestorsheads(*revs)
1658 1658 except (AttributeError, OverflowError): # C implementation failed
1659 1659 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1660 1660 return ancs
1661 1661
1662 1662 def isancestor(self, a, b):
1663 1663 """return True if node a is an ancestor of node b
1664 1664
1665 1665 A revision is considered an ancestor of itself."""
1666 1666 a, b = self.rev(a), self.rev(b)
1667 1667 return self.isancestorrev(a, b)
1668 1668
1669 def descendant(self, a, b):
1670 msg = (b'revlog.descendant is deprecated, use revlog.isancestorrev')
1671 self._repo.ui.deprecwarn(msg, b'4.7')
1672 return self.isancestorrev(a, b)
1673
1669 1674 def isancestorrev(self, a, b):
1670 1675 """return True if revision a is an ancestor of revision b
1671 1676
1672 1677 A revision is considered an ancestor of itself.
1673 1678
1674 1679 The implementation of this is trivial but the use of
1675 1680 commonancestorsheads is not."""
1676 1681 if a == nullrev:
1677 1682 return True
1678 1683 elif a == b:
1679 1684 return True
1680 1685 elif a > b:
1681 1686 return False
1682 1687 return a in self._commonancestorsheads(a, b)
1683 1688
1684 1689 def ancestor(self, a, b):
1685 1690 """calculate the "best" common ancestor of nodes a and b"""
1686 1691
1687 1692 a, b = self.rev(a), self.rev(b)
1688 1693 try:
1689 1694 ancs = self.index.ancestors(a, b)
1690 1695 except (AttributeError, OverflowError):
1691 1696 ancs = ancestor.ancestors(self.parentrevs, a, b)
1692 1697 if ancs:
1693 1698 # choose a consistent winner when there's a tie
1694 1699 return min(map(self.node, ancs))
1695 1700 return nullid
1696 1701
1697 1702 def _match(self, id):
1698 1703 if isinstance(id, int):
1699 1704 # rev
1700 1705 return self.node(id)
1701 1706 if len(id) == 20:
1702 1707 # possibly a binary node
1703 1708 # odds of a binary node being all hex in ASCII are 1 in 10**25
1704 1709 try:
1705 1710 node = id
1706 1711 self.rev(node) # quick search the index
1707 1712 return node
1708 1713 except LookupError:
1709 1714 pass # may be partial hex id
1710 1715 try:
1711 1716 # str(rev)
1712 1717 rev = int(id)
1713 1718 if "%d" % rev != id:
1714 1719 raise ValueError
1715 1720 if rev < 0:
1716 1721 rev = len(self) + rev
1717 1722 if rev < 0 or rev >= len(self):
1718 1723 raise ValueError
1719 1724 return self.node(rev)
1720 1725 except (ValueError, OverflowError):
1721 1726 pass
1722 1727 if len(id) == 40:
1723 1728 try:
1724 1729 # a full hex nodeid?
1725 1730 node = bin(id)
1726 1731 self.rev(node)
1727 1732 return node
1728 1733 except (TypeError, LookupError):
1729 1734 pass
1730 1735
1731 1736 def _partialmatch(self, id):
1732 1737 # we don't care wdirfilenodeids as they should be always full hash
1733 1738 maybewdir = wdirhex.startswith(id)
1734 1739 try:
1735 1740 partial = self.index.partialmatch(id)
1736 1741 if partial and self.hasnode(partial):
1737 1742 if maybewdir:
1738 1743 # single 'ff...' match in radix tree, ambiguous with wdir
1739 1744 raise RevlogError
1740 1745 return partial
1741 1746 if maybewdir:
1742 1747 # no 'ff...' match in radix tree, wdir identified
1743 1748 raise error.WdirUnsupported
1744 1749 return None
1745 1750 except RevlogError:
1746 1751 # parsers.c radix tree lookup gave multiple matches
1747 1752 # fast path: for unfiltered changelog, radix tree is accurate
1748 1753 if not getattr(self, 'filteredrevs', None):
1749 1754 raise LookupError(id, self.indexfile,
1750 1755 _('ambiguous identifier'))
1751 1756 # fall through to slow path that filters hidden revisions
1752 1757 except (AttributeError, ValueError):
1753 1758 # we are pure python, or key was too short to search radix tree
1754 1759 pass
1755 1760
1756 1761 if id in self._pcache:
1757 1762 return self._pcache[id]
1758 1763
1759 1764 if len(id) <= 40:
1760 1765 try:
1761 1766 # hex(node)[:...]
1762 1767 l = len(id) // 2 # grab an even number of digits
1763 1768 prefix = bin(id[:l * 2])
1764 1769 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1765 1770 nl = [n for n in nl if hex(n).startswith(id) and
1766 1771 self.hasnode(n)]
1767 1772 if len(nl) > 0:
1768 1773 if len(nl) == 1 and not maybewdir:
1769 1774 self._pcache[id] = nl[0]
1770 1775 return nl[0]
1771 1776 raise LookupError(id, self.indexfile,
1772 1777 _('ambiguous identifier'))
1773 1778 if maybewdir:
1774 1779 raise error.WdirUnsupported
1775 1780 return None
1776 1781 except TypeError:
1777 1782 pass
1778 1783
1779 1784 def lookup(self, id):
1780 1785 """locate a node based on:
1781 1786 - revision number or str(revision number)
1782 1787 - nodeid or subset of hex nodeid
1783 1788 """
1784 1789 n = self._match(id)
1785 1790 if n is not None:
1786 1791 return n
1787 1792 n = self._partialmatch(id)
1788 1793 if n:
1789 1794 return n
1790 1795
1791 1796 raise LookupError(id, self.indexfile, _('no match found'))
1792 1797
1793 1798 def shortest(self, node, minlength=1):
1794 1799 """Find the shortest unambiguous prefix that matches node."""
1795 1800 def isvalid(prefix):
1796 1801 try:
1797 1802 node = self._partialmatch(prefix)
1798 1803 except error.RevlogError:
1799 1804 return False
1800 1805 except error.WdirUnsupported:
1801 1806 # single 'ff...' match
1802 1807 return True
1803 1808 if node is None:
1804 1809 raise LookupError(node, self.indexfile, _('no node'))
1805 1810 return True
1806 1811
1807 1812 def maybewdir(prefix):
1808 1813 return all(c == 'f' for c in prefix)
1809 1814
1810 1815 hexnode = hex(node)
1811 1816
1812 1817 def disambiguate(hexnode, minlength):
1813 1818 """Disambiguate against wdirid."""
1814 1819 for length in range(minlength, 41):
1815 1820 prefix = hexnode[:length]
1816 1821 if not maybewdir(prefix):
1817 1822 return prefix
1818 1823
1819 1824 if not getattr(self, 'filteredrevs', None):
1820 1825 try:
1821 1826 length = max(self.index.shortest(node), minlength)
1822 1827 return disambiguate(hexnode, length)
1823 1828 except RevlogError:
1824 1829 if node != wdirid:
1825 1830 raise LookupError(node, self.indexfile, _('no node'))
1826 1831 except AttributeError:
1827 1832 # Fall through to pure code
1828 1833 pass
1829 1834
1830 1835 if node == wdirid:
1831 1836 for length in range(minlength, 41):
1832 1837 prefix = hexnode[:length]
1833 1838 if isvalid(prefix):
1834 1839 return prefix
1835 1840
1836 1841 for length in range(minlength, 41):
1837 1842 prefix = hexnode[:length]
1838 1843 if isvalid(prefix):
1839 1844 return disambiguate(hexnode, length)
1840 1845
1841 1846 def cmp(self, node, text):
1842 1847 """compare text with a given file revision
1843 1848
1844 1849 returns True if text is different than what is stored.
1845 1850 """
1846 1851 p1, p2 = self.parents(node)
1847 1852 return hash(text, p1, p2) != node
1848 1853
1849 1854 def _cachesegment(self, offset, data):
1850 1855 """Add a segment to the revlog cache.
1851 1856
1852 1857 Accepts an absolute offset and the data that is at that location.
1853 1858 """
1854 1859 o, d = self._chunkcache
1855 1860 # try to add to existing cache
1856 1861 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1857 1862 self._chunkcache = o, d + data
1858 1863 else:
1859 1864 self._chunkcache = offset, data
1860 1865
1861 1866 def _readsegment(self, offset, length, df=None):
1862 1867 """Load a segment of raw data from the revlog.
1863 1868
1864 1869 Accepts an absolute offset, length to read, and an optional existing
1865 1870 file handle to read from.
1866 1871
1867 1872 If an existing file handle is passed, it will be seeked and the
1868 1873 original seek position will NOT be restored.
1869 1874
1870 1875 Returns a str or buffer of raw byte data.
1871 1876 """
1872 1877 # Cache data both forward and backward around the requested
1873 1878 # data, in a fixed size window. This helps speed up operations
1874 1879 # involving reading the revlog backwards.
1875 1880 cachesize = self._chunkcachesize
1876 1881 realoffset = offset & ~(cachesize - 1)
1877 1882 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1878 1883 - realoffset)
1879 1884 with self._datareadfp(df) as df:
1880 1885 df.seek(realoffset)
1881 1886 d = df.read(reallength)
1882 1887 self._cachesegment(realoffset, d)
1883 1888 if offset != realoffset or reallength != length:
1884 1889 return util.buffer(d, offset - realoffset, length)
1885 1890 return d
1886 1891
1887 1892 def _getsegment(self, offset, length, df=None):
1888 1893 """Obtain a segment of raw data from the revlog.
1889 1894
1890 1895 Accepts an absolute offset, length of bytes to obtain, and an
1891 1896 optional file handle to the already-opened revlog. If the file
1892 1897 handle is used, it's original seek position will not be preserved.
1893 1898
1894 1899 Requests for data may be returned from a cache.
1895 1900
1896 1901 Returns a str or a buffer instance of raw byte data.
1897 1902 """
1898 1903 o, d = self._chunkcache
1899 1904 l = len(d)
1900 1905
1901 1906 # is it in the cache?
1902 1907 cachestart = offset - o
1903 1908 cacheend = cachestart + length
1904 1909 if cachestart >= 0 and cacheend <= l:
1905 1910 if cachestart == 0 and cacheend == l:
1906 1911 return d # avoid a copy
1907 1912 return util.buffer(d, cachestart, cacheend - cachestart)
1908 1913
1909 1914 return self._readsegment(offset, length, df=df)
1910 1915
1911 1916 def _getsegmentforrevs(self, startrev, endrev, df=None):
1912 1917 """Obtain a segment of raw data corresponding to a range of revisions.
1913 1918
1914 1919 Accepts the start and end revisions and an optional already-open
1915 1920 file handle to be used for reading. If the file handle is read, its
1916 1921 seek position will not be preserved.
1917 1922
1918 1923 Requests for data may be satisfied by a cache.
1919 1924
1920 1925 Returns a 2-tuple of (offset, data) for the requested range of
1921 1926 revisions. Offset is the integer offset from the beginning of the
1922 1927 revlog and data is a str or buffer of the raw byte data.
1923 1928
1924 1929 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1925 1930 to determine where each revision's data begins and ends.
1926 1931 """
1927 1932 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1928 1933 # (functions are expensive).
1929 1934 index = self.index
1930 1935 istart = index[startrev]
1931 1936 start = int(istart[0] >> 16)
1932 1937 if startrev == endrev:
1933 1938 end = start + istart[1]
1934 1939 else:
1935 1940 iend = index[endrev]
1936 1941 end = int(iend[0] >> 16) + iend[1]
1937 1942
1938 1943 if self._inline:
1939 1944 start += (startrev + 1) * self._io.size
1940 1945 end += (endrev + 1) * self._io.size
1941 1946 length = end - start
1942 1947
1943 1948 return start, self._getsegment(start, length, df=df)
1944 1949
1945 1950 def _chunk(self, rev, df=None):
1946 1951 """Obtain a single decompressed chunk for a revision.
1947 1952
1948 1953 Accepts an integer revision and an optional already-open file handle
1949 1954 to be used for reading. If used, the seek position of the file will not
1950 1955 be preserved.
1951 1956
1952 1957 Returns a str holding uncompressed data for the requested revision.
1953 1958 """
1954 1959 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1955 1960
1956 1961 def _chunks(self, revs, df=None, targetsize=None):
1957 1962 """Obtain decompressed chunks for the specified revisions.
1958 1963
1959 1964 Accepts an iterable of numeric revisions that are assumed to be in
1960 1965 ascending order. Also accepts an optional already-open file handle
1961 1966 to be used for reading. If used, the seek position of the file will
1962 1967 not be preserved.
1963 1968
1964 1969 This function is similar to calling ``self._chunk()`` multiple times,
1965 1970 but is faster.
1966 1971
1967 1972 Returns a list with decompressed data for each requested revision.
1968 1973 """
1969 1974 if not revs:
1970 1975 return []
1971 1976 start = self.start
1972 1977 length = self.length
1973 1978 inline = self._inline
1974 1979 iosize = self._io.size
1975 1980 buffer = util.buffer
1976 1981
1977 1982 l = []
1978 1983 ladd = l.append
1979 1984
1980 1985 if not self._withsparseread:
1981 1986 slicedchunks = (revs,)
1982 1987 else:
1983 1988 slicedchunks = _slicechunk(self, revs, targetsize)
1984 1989
1985 1990 for revschunk in slicedchunks:
1986 1991 firstrev = revschunk[0]
1987 1992 # Skip trailing revisions with empty diff
1988 1993 for lastrev in revschunk[::-1]:
1989 1994 if length(lastrev) != 0:
1990 1995 break
1991 1996
1992 1997 try:
1993 1998 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1994 1999 except OverflowError:
1995 2000 # issue4215 - we can't cache a run of chunks greater than
1996 2001 # 2G on Windows
1997 2002 return [self._chunk(rev, df=df) for rev in revschunk]
1998 2003
1999 2004 decomp = self.decompress
2000 2005 for rev in revschunk:
2001 2006 chunkstart = start(rev)
2002 2007 if inline:
2003 2008 chunkstart += (rev + 1) * iosize
2004 2009 chunklength = length(rev)
2005 2010 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2006 2011
2007 2012 return l
2008 2013
2009 2014 def _chunkclear(self):
2010 2015 """Clear the raw chunk cache."""
2011 2016 self._chunkcache = (0, '')
2012 2017
2013 2018 def deltaparent(self, rev):
2014 2019 """return deltaparent of the given revision"""
2015 2020 base = self.index[rev][3]
2016 2021 if base == rev:
2017 2022 return nullrev
2018 2023 elif self._generaldelta:
2019 2024 return base
2020 2025 else:
2021 2026 return rev - 1
2022 2027
2023 2028 def revdiff(self, rev1, rev2):
2024 2029 """return or calculate a delta between two revisions
2025 2030
2026 2031 The delta calculated is in binary form and is intended to be written to
2027 2032 revlog data directly. So this function needs raw revision data.
2028 2033 """
2029 2034 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2030 2035 return bytes(self._chunk(rev2))
2031 2036
2032 2037 return mdiff.textdiff(self.revision(rev1, raw=True),
2033 2038 self.revision(rev2, raw=True))
2034 2039
2035 2040 def revision(self, nodeorrev, _df=None, raw=False):
2036 2041 """return an uncompressed revision of a given node or revision
2037 2042 number.
2038 2043
2039 2044 _df - an existing file handle to read from. (internal-only)
2040 2045 raw - an optional argument specifying if the revision data is to be
2041 2046 treated as raw data when applying flag transforms. 'raw' should be set
2042 2047 to True when generating changegroups or in debug commands.
2043 2048 """
2044 2049 if isinstance(nodeorrev, int):
2045 2050 rev = nodeorrev
2046 2051 node = self.node(rev)
2047 2052 else:
2048 2053 node = nodeorrev
2049 2054 rev = None
2050 2055
2051 2056 cachedrev = None
2052 2057 flags = None
2053 2058 rawtext = None
2054 2059 if node == nullid:
2055 2060 return ""
2056 2061 if self._cache:
2057 2062 if self._cache[0] == node:
2058 2063 # _cache only stores rawtext
2059 2064 if raw:
2060 2065 return self._cache[2]
2061 2066 # duplicated, but good for perf
2062 2067 if rev is None:
2063 2068 rev = self.rev(node)
2064 2069 if flags is None:
2065 2070 flags = self.flags(rev)
2066 2071 # no extra flags set, no flag processor runs, text = rawtext
2067 2072 if flags == REVIDX_DEFAULT_FLAGS:
2068 2073 return self._cache[2]
2069 2074 # rawtext is reusable. need to run flag processor
2070 2075 rawtext = self._cache[2]
2071 2076
2072 2077 cachedrev = self._cache[1]
2073 2078
2074 2079 # look up what we need to read
2075 2080 if rawtext is None:
2076 2081 if rev is None:
2077 2082 rev = self.rev(node)
2078 2083
2079 2084 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2080 2085 if stopped:
2081 2086 rawtext = self._cache[2]
2082 2087
2083 2088 # drop cache to save memory
2084 2089 self._cache = None
2085 2090
2086 2091 targetsize = None
2087 2092 rawsize = self.index[rev][2]
2088 2093 if 0 <= rawsize:
2089 2094 targetsize = 4 * rawsize
2090 2095
2091 2096 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2092 2097 if rawtext is None:
2093 2098 rawtext = bytes(bins[0])
2094 2099 bins = bins[1:]
2095 2100
2096 2101 rawtext = mdiff.patches(rawtext, bins)
2097 2102 self._cache = (node, rev, rawtext)
2098 2103
2099 2104 if flags is None:
2100 2105 if rev is None:
2101 2106 rev = self.rev(node)
2102 2107 flags = self.flags(rev)
2103 2108
2104 2109 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2105 2110 if validatehash:
2106 2111 self.checkhash(text, node, rev=rev)
2107 2112
2108 2113 return text
2109 2114
2110 2115 def hash(self, text, p1, p2):
2111 2116 """Compute a node hash.
2112 2117
2113 2118 Available as a function so that subclasses can replace the hash
2114 2119 as needed.
2115 2120 """
2116 2121 return hash(text, p1, p2)
2117 2122
2118 2123 def _processflags(self, text, flags, operation, raw=False):
2119 2124 """Inspect revision data flags and applies transforms defined by
2120 2125 registered flag processors.
2121 2126
2122 2127 ``text`` - the revision data to process
2123 2128 ``flags`` - the revision flags
2124 2129 ``operation`` - the operation being performed (read or write)
2125 2130 ``raw`` - an optional argument describing if the raw transform should be
2126 2131 applied.
2127 2132
2128 2133 This method processes the flags in the order (or reverse order if
2129 2134 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2130 2135 flag processors registered for present flags. The order of flags defined
2131 2136 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2132 2137
2133 2138 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2134 2139 processed text and ``validatehash`` is a bool indicating whether the
2135 2140 returned text should be checked for hash integrity.
2136 2141
2137 2142 Note: If the ``raw`` argument is set, it has precedence over the
2138 2143 operation and will only update the value of ``validatehash``.
2139 2144 """
2140 2145 # fast path: no flag processors will run
2141 2146 if flags == 0:
2142 2147 return text, True
2143 2148 if not operation in ('read', 'write'):
2144 2149 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2145 2150 # Check all flags are known.
2146 2151 if flags & ~REVIDX_KNOWN_FLAGS:
2147 2152 raise RevlogError(_("incompatible revision flag '%#x'") %
2148 2153 (flags & ~REVIDX_KNOWN_FLAGS))
2149 2154 validatehash = True
2150 2155 # Depending on the operation (read or write), the order might be
2151 2156 # reversed due to non-commutative transforms.
2152 2157 orderedflags = REVIDX_FLAGS_ORDER
2153 2158 if operation == 'write':
2154 2159 orderedflags = reversed(orderedflags)
2155 2160
2156 2161 for flag in orderedflags:
2157 2162 # If a flagprocessor has been registered for a known flag, apply the
2158 2163 # related operation transform and update result tuple.
2159 2164 if flag & flags:
2160 2165 vhash = True
2161 2166
2162 2167 if flag not in _flagprocessors:
2163 2168 message = _("missing processor for flag '%#x'") % (flag)
2164 2169 raise RevlogError(message)
2165 2170
2166 2171 processor = _flagprocessors[flag]
2167 2172 if processor is not None:
2168 2173 readtransform, writetransform, rawtransform = processor
2169 2174
2170 2175 if raw:
2171 2176 vhash = rawtransform(self, text)
2172 2177 elif operation == 'read':
2173 2178 text, vhash = readtransform(self, text)
2174 2179 else: # write operation
2175 2180 text, vhash = writetransform(self, text)
2176 2181 validatehash = validatehash and vhash
2177 2182
2178 2183 return text, validatehash
2179 2184
2180 2185 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2181 2186 """Check node hash integrity.
2182 2187
2183 2188 Available as a function so that subclasses can extend hash mismatch
2184 2189 behaviors as needed.
2185 2190 """
2186 2191 try:
2187 2192 if p1 is None and p2 is None:
2188 2193 p1, p2 = self.parents(node)
2189 2194 if node != self.hash(text, p1, p2):
2190 2195 revornode = rev
2191 2196 if revornode is None:
2192 2197 revornode = templatefilters.short(hex(node))
2193 2198 raise RevlogError(_("integrity check failed on %s:%s")
2194 2199 % (self.indexfile, pycompat.bytestr(revornode)))
2195 2200 except RevlogError:
2196 2201 if self._censorable and _censoredtext(text):
2197 2202 raise error.CensoredNodeError(self.indexfile, node, text)
2198 2203 raise
2199 2204
2200 2205 def _enforceinlinesize(self, tr, fp=None):
2201 2206 """Check if the revlog is too big for inline and convert if so.
2202 2207
2203 2208 This should be called after revisions are added to the revlog. If the
2204 2209 revlog has grown too large to be an inline revlog, it will convert it
2205 2210 to use multiple index and data files.
2206 2211 """
2207 2212 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
2208 2213 return
2209 2214
2210 2215 trinfo = tr.find(self.indexfile)
2211 2216 if trinfo is None:
2212 2217 raise RevlogError(_("%s not found in the transaction")
2213 2218 % self.indexfile)
2214 2219
2215 2220 trindex = trinfo[2]
2216 2221 if trindex is not None:
2217 2222 dataoff = self.start(trindex)
2218 2223 else:
2219 2224 # revlog was stripped at start of transaction, use all leftover data
2220 2225 trindex = len(self) - 1
2221 2226 dataoff = self.end(-2)
2222 2227
2223 2228 tr.add(self.datafile, dataoff)
2224 2229
2225 2230 if fp:
2226 2231 fp.flush()
2227 2232 fp.close()
2228 2233
2229 2234 with self._datafp('w') as df:
2230 2235 for r in self:
2231 2236 df.write(self._getsegmentforrevs(r, r)[1])
2232 2237
2233 2238 with self._indexfp('w') as fp:
2234 2239 self.version &= ~FLAG_INLINE_DATA
2235 2240 self._inline = False
2236 2241 io = self._io
2237 2242 for i in self:
2238 2243 e = io.packentry(self.index[i], self.node, self.version, i)
2239 2244 fp.write(e)
2240 2245
2241 2246 # the temp file replace the real index when we exit the context
2242 2247 # manager
2243 2248
2244 2249 tr.replace(self.indexfile, trindex * self._io.size)
2245 2250 self._chunkclear()
2246 2251
2247 2252 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2248 2253 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2249 2254 """add a revision to the log
2250 2255
2251 2256 text - the revision data to add
2252 2257 transaction - the transaction object used for rollback
2253 2258 link - the linkrev data to add
2254 2259 p1, p2 - the parent nodeids of the revision
2255 2260 cachedelta - an optional precomputed delta
2256 2261 node - nodeid of revision; typically node is not specified, and it is
2257 2262 computed by default as hash(text, p1, p2), however subclasses might
2258 2263 use different hashing method (and override checkhash() in such case)
2259 2264 flags - the known flags to set on the revision
2260 2265 deltacomputer - an optional _deltacomputer instance shared between
2261 2266 multiple calls
2262 2267 """
2263 2268 if link == nullrev:
2264 2269 raise RevlogError(_("attempted to add linkrev -1 to %s")
2265 2270 % self.indexfile)
2266 2271
2267 2272 if flags:
2268 2273 node = node or self.hash(text, p1, p2)
2269 2274
2270 2275 rawtext, validatehash = self._processflags(text, flags, 'write')
2271 2276
2272 2277 # If the flag processor modifies the revision data, ignore any provided
2273 2278 # cachedelta.
2274 2279 if rawtext != text:
2275 2280 cachedelta = None
2276 2281
2277 2282 if len(rawtext) > _maxentrysize:
2278 2283 raise RevlogError(
2279 2284 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2280 2285 % (self.indexfile, len(rawtext)))
2281 2286
2282 2287 node = node or self.hash(rawtext, p1, p2)
2283 2288 if node in self.nodemap:
2284 2289 return node
2285 2290
2286 2291 if validatehash:
2287 2292 self.checkhash(rawtext, node, p1=p1, p2=p2)
2288 2293
2289 2294 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2290 2295 flags, cachedelta=cachedelta,
2291 2296 deltacomputer=deltacomputer)
2292 2297
2293 2298 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2294 2299 cachedelta=None, deltacomputer=None):
2295 2300 """add a raw revision with known flags, node and parents
2296 2301 useful when reusing a revision not stored in this revlog (ex: received
2297 2302 over wire, or read from an external bundle).
2298 2303 """
2299 2304 dfh = None
2300 2305 if not self._inline:
2301 2306 dfh = self._datafp("a+")
2302 2307 ifh = self._indexfp("a+")
2303 2308 try:
2304 2309 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2305 2310 flags, cachedelta, ifh, dfh,
2306 2311 deltacomputer=deltacomputer)
2307 2312 finally:
2308 2313 if dfh:
2309 2314 dfh.close()
2310 2315 ifh.close()
2311 2316
2312 2317 def compress(self, data):
2313 2318 """Generate a possibly-compressed representation of data."""
2314 2319 if not data:
2315 2320 return '', data
2316 2321
2317 2322 compressed = self._compressor.compress(data)
2318 2323
2319 2324 if compressed:
2320 2325 # The revlog compressor added the header in the returned data.
2321 2326 return '', compressed
2322 2327
2323 2328 if data[0:1] == '\0':
2324 2329 return '', data
2325 2330 return 'u', data
2326 2331
2327 2332 def decompress(self, data):
2328 2333 """Decompress a revlog chunk.
2329 2334
2330 2335 The chunk is expected to begin with a header identifying the
2331 2336 format type so it can be routed to an appropriate decompressor.
2332 2337 """
2333 2338 if not data:
2334 2339 return data
2335 2340
2336 2341 # Revlogs are read much more frequently than they are written and many
2337 2342 # chunks only take microseconds to decompress, so performance is
2338 2343 # important here.
2339 2344 #
2340 2345 # We can make a few assumptions about revlogs:
2341 2346 #
2342 2347 # 1) the majority of chunks will be compressed (as opposed to inline
2343 2348 # raw data).
2344 2349 # 2) decompressing *any* data will likely by at least 10x slower than
2345 2350 # returning raw inline data.
2346 2351 # 3) we want to prioritize common and officially supported compression
2347 2352 # engines
2348 2353 #
2349 2354 # It follows that we want to optimize for "decompress compressed data
2350 2355 # when encoded with common and officially supported compression engines"
2351 2356 # case over "raw data" and "data encoded by less common or non-official
2352 2357 # compression engines." That is why we have the inline lookup first
2353 2358 # followed by the compengines lookup.
2354 2359 #
2355 2360 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2356 2361 # compressed chunks. And this matters for changelog and manifest reads.
2357 2362 t = data[0:1]
2358 2363
2359 2364 if t == 'x':
2360 2365 try:
2361 2366 return _zlibdecompress(data)
2362 2367 except zlib.error as e:
2363 2368 raise RevlogError(_('revlog decompress error: %s') %
2364 2369 stringutil.forcebytestr(e))
2365 2370 # '\0' is more common than 'u' so it goes first.
2366 2371 elif t == '\0':
2367 2372 return data
2368 2373 elif t == 'u':
2369 2374 return util.buffer(data, 1)
2370 2375
2371 2376 try:
2372 2377 compressor = self._decompressors[t]
2373 2378 except KeyError:
2374 2379 try:
2375 2380 engine = util.compengines.forrevlogheader(t)
2376 2381 compressor = engine.revlogcompressor()
2377 2382 self._decompressors[t] = compressor
2378 2383 except KeyError:
2379 2384 raise RevlogError(_('unknown compression type %r') % t)
2380 2385
2381 2386 return compressor.decompress(data)
2382 2387
2383 2388 def _isgooddeltainfo(self, deltainfo, revinfo):
2384 2389 """Returns True if the given delta is good. Good means that it is within
2385 2390 the disk span, disk size, and chain length bounds that we know to be
2386 2391 performant."""
2387 2392 if deltainfo is None:
2388 2393 return False
2389 2394
2390 2395 # - 'deltainfo.distance' is the distance from the base revision --
2391 2396 # bounding it limits the amount of I/O we need to do.
2392 2397 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2393 2398 # deltas we need to apply -- bounding it limits the amount of CPU
2394 2399 # we consume.
2395 2400
2396 2401 textlen = revinfo.textlen
2397 2402 defaultmax = textlen * 4
2398 2403 maxdist = self._maxdeltachainspan
2399 2404 if not maxdist:
2400 2405 maxdist = deltainfo.distance # ensure the conditional pass
2401 2406 maxdist = max(maxdist, defaultmax)
2402 2407 if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
2403 2408 deltainfo.compresseddeltalen > textlen * 2 or
2404 2409 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2405 2410 return False
2406 2411
2407 2412 return True
2408 2413
2409 2414 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2410 2415 cachedelta, ifh, dfh, alwayscache=False,
2411 2416 deltacomputer=None):
2412 2417 """internal function to add revisions to the log
2413 2418
2414 2419 see addrevision for argument descriptions.
2415 2420
2416 2421 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2417 2422
2418 2423 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2419 2424 be used.
2420 2425
2421 2426 invariants:
2422 2427 - rawtext is optional (can be None); if not set, cachedelta must be set.
2423 2428 if both are set, they must correspond to each other.
2424 2429 """
2425 2430 if node == nullid:
2426 2431 raise RevlogError(_("%s: attempt to add null revision") %
2427 2432 (self.indexfile))
2428 2433 if node == wdirid or node in wdirfilenodeids:
2429 2434 raise RevlogError(_("%s: attempt to add wdir revision") %
2430 2435 (self.indexfile))
2431 2436
2432 2437 if self._inline:
2433 2438 fh = ifh
2434 2439 else:
2435 2440 fh = dfh
2436 2441
2437 2442 btext = [rawtext]
2438 2443
2439 2444 curr = len(self)
2440 2445 prev = curr - 1
2441 2446 offset = self.end(prev)
2442 2447 p1r, p2r = self.rev(p1), self.rev(p2)
2443 2448
2444 2449 # full versions are inserted when the needed deltas
2445 2450 # become comparable to the uncompressed text
2446 2451 if rawtext is None:
2447 2452 # need rawtext size, before changed by flag processors, which is
2448 2453 # the non-raw size. use revlog explicitly to avoid filelog's extra
2449 2454 # logic that might remove metadata size.
2450 2455 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2451 2456 cachedelta[1])
2452 2457 else:
2453 2458 textlen = len(rawtext)
2454 2459
2455 2460 if deltacomputer is None:
2456 2461 deltacomputer = _deltacomputer(self)
2457 2462
2458 2463 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2459 2464
2460 2465 # no delta for flag processor revision (see "candelta" for why)
2461 2466 # not calling candelta since only one revision needs test, also to
2462 2467 # avoid overhead fetching flags again.
2463 2468 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2464 2469 deltainfo = None
2465 2470 else:
2466 2471 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2467 2472
2468 2473 if deltainfo is not None:
2469 2474 base = deltainfo.base
2470 2475 chainbase = deltainfo.chainbase
2471 2476 data = deltainfo.data
2472 2477 l = deltainfo.deltalen
2473 2478 else:
2474 2479 rawtext = deltacomputer.buildtext(revinfo, fh)
2475 2480 data = self.compress(rawtext)
2476 2481 l = len(data[1]) + len(data[0])
2477 2482 base = chainbase = curr
2478 2483
2479 2484 e = (offset_type(offset, flags), l, textlen,
2480 2485 base, link, p1r, p2r, node)
2481 2486 self.index.insert(-1, e)
2482 2487 self.nodemap[node] = curr
2483 2488
2484 2489 entry = self._io.packentry(e, self.node, self.version, curr)
2485 2490 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2486 2491
2487 2492 if alwayscache and rawtext is None:
2488 2493 rawtext = deltacomputer._buildtext(revinfo, fh)
2489 2494
2490 2495 if type(rawtext) == bytes: # only accept immutable objects
2491 2496 self._cache = (node, curr, rawtext)
2492 2497 self._chainbasecache[curr] = chainbase
2493 2498 return node
2494 2499
2495 2500 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2496 2501 # Files opened in a+ mode have inconsistent behavior on various
2497 2502 # platforms. Windows requires that a file positioning call be made
2498 2503 # when the file handle transitions between reads and writes. See
2499 2504 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2500 2505 # platforms, Python or the platform itself can be buggy. Some versions
2501 2506 # of Solaris have been observed to not append at the end of the file
2502 2507 # if the file was seeked to before the end. See issue4943 for more.
2503 2508 #
2504 2509 # We work around this issue by inserting a seek() before writing.
2505 2510 # Note: This is likely not necessary on Python 3.
2506 2511 ifh.seek(0, os.SEEK_END)
2507 2512 if dfh:
2508 2513 dfh.seek(0, os.SEEK_END)
2509 2514
2510 2515 curr = len(self) - 1
2511 2516 if not self._inline:
2512 2517 transaction.add(self.datafile, offset)
2513 2518 transaction.add(self.indexfile, curr * len(entry))
2514 2519 if data[0]:
2515 2520 dfh.write(data[0])
2516 2521 dfh.write(data[1])
2517 2522 ifh.write(entry)
2518 2523 else:
2519 2524 offset += curr * self._io.size
2520 2525 transaction.add(self.indexfile, offset, curr)
2521 2526 ifh.write(entry)
2522 2527 ifh.write(data[0])
2523 2528 ifh.write(data[1])
2524 2529 self._enforceinlinesize(transaction, ifh)
2525 2530
2526 2531 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2527 2532 """
2528 2533 add a delta group
2529 2534
2530 2535 given a set of deltas, add them to the revision log. the
2531 2536 first delta is against its parent, which should be in our
2532 2537 log, the rest are against the previous delta.
2533 2538
2534 2539 If ``addrevisioncb`` is defined, it will be called with arguments of
2535 2540 this revlog and the node that was added.
2536 2541 """
2537 2542
2538 2543 nodes = []
2539 2544
2540 2545 r = len(self)
2541 2546 end = 0
2542 2547 if r:
2543 2548 end = self.end(r - 1)
2544 2549 ifh = self._indexfp("a+")
2545 2550 isize = r * self._io.size
2546 2551 if self._inline:
2547 2552 transaction.add(self.indexfile, end + isize, r)
2548 2553 dfh = None
2549 2554 else:
2550 2555 transaction.add(self.indexfile, isize, r)
2551 2556 transaction.add(self.datafile, end)
2552 2557 dfh = self._datafp("a+")
2553 2558 def flush():
2554 2559 if dfh:
2555 2560 dfh.flush()
2556 2561 ifh.flush()
2557 2562 try:
2558 2563 deltacomputer = _deltacomputer(self)
2559 2564 # loop through our set of deltas
2560 2565 for data in deltas:
2561 2566 node, p1, p2, linknode, deltabase, delta, flags = data
2562 2567 link = linkmapper(linknode)
2563 2568 flags = flags or REVIDX_DEFAULT_FLAGS
2564 2569
2565 2570 nodes.append(node)
2566 2571
2567 2572 if node in self.nodemap:
2568 2573 # this can happen if two branches make the same change
2569 2574 continue
2570 2575
2571 2576 for p in (p1, p2):
2572 2577 if p not in self.nodemap:
2573 2578 raise LookupError(p, self.indexfile,
2574 2579 _('unknown parent'))
2575 2580
2576 2581 if deltabase not in self.nodemap:
2577 2582 raise LookupError(deltabase, self.indexfile,
2578 2583 _('unknown delta base'))
2579 2584
2580 2585 baserev = self.rev(deltabase)
2581 2586
2582 2587 if baserev != nullrev and self.iscensored(baserev):
2583 2588 # if base is censored, delta must be full replacement in a
2584 2589 # single patch operation
2585 2590 hlen = struct.calcsize(">lll")
2586 2591 oldlen = self.rawsize(baserev)
2587 2592 newlen = len(delta) - hlen
2588 2593 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2589 2594 raise error.CensoredBaseError(self.indexfile,
2590 2595 self.node(baserev))
2591 2596
2592 2597 if not flags and self._peek_iscensored(baserev, delta, flush):
2593 2598 flags |= REVIDX_ISCENSORED
2594 2599
2595 2600 # We assume consumers of addrevisioncb will want to retrieve
2596 2601 # the added revision, which will require a call to
2597 2602 # revision(). revision() will fast path if there is a cache
2598 2603 # hit. So, we tell _addrevision() to always cache in this case.
2599 2604 # We're only using addgroup() in the context of changegroup
2600 2605 # generation so the revision data can always be handled as raw
2601 2606 # by the flagprocessor.
2602 2607 self._addrevision(node, None, transaction, link,
2603 2608 p1, p2, flags, (baserev, delta),
2604 2609 ifh, dfh,
2605 2610 alwayscache=bool(addrevisioncb),
2606 2611 deltacomputer=deltacomputer)
2607 2612
2608 2613 if addrevisioncb:
2609 2614 addrevisioncb(self, node)
2610 2615
2611 2616 if not dfh and not self._inline:
2612 2617 # addrevision switched from inline to conventional
2613 2618 # reopen the index
2614 2619 ifh.close()
2615 2620 dfh = self._datafp("a+")
2616 2621 ifh = self._indexfp("a+")
2617 2622 finally:
2618 2623 if dfh:
2619 2624 dfh.close()
2620 2625 ifh.close()
2621 2626
2622 2627 return nodes
2623 2628
2624 2629 def iscensored(self, rev):
2625 2630 """Check if a file revision is censored."""
2626 2631 if not self._censorable:
2627 2632 return False
2628 2633
2629 2634 return self.flags(rev) & REVIDX_ISCENSORED
2630 2635
2631 2636 def _peek_iscensored(self, baserev, delta, flush):
2632 2637 """Quickly check if a delta produces a censored revision."""
2633 2638 if not self._censorable:
2634 2639 return False
2635 2640
2636 2641 # Fragile heuristic: unless new file meta keys are added alphabetically
2637 2642 # preceding "censored", all censored revisions are prefixed by
2638 2643 # "\1\ncensored:". A delta producing such a censored revision must be a
2639 2644 # full-replacement delta, so we inspect the first and only patch in the
2640 2645 # delta for this prefix.
2641 2646 hlen = struct.calcsize(">lll")
2642 2647 if len(delta) <= hlen:
2643 2648 return False
2644 2649
2645 2650 oldlen = self.rawsize(baserev)
2646 2651 newlen = len(delta) - hlen
2647 2652 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2648 2653 return False
2649 2654
2650 2655 add = "\1\ncensored:"
2651 2656 addlen = len(add)
2652 2657 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2653 2658
2654 2659 def getstrippoint(self, minlink):
2655 2660 """find the minimum rev that must be stripped to strip the linkrev
2656 2661
2657 2662 Returns a tuple containing the minimum rev and a set of all revs that
2658 2663 have linkrevs that will be broken by this strip.
2659 2664 """
2660 2665 brokenrevs = set()
2661 2666 strippoint = len(self)
2662 2667
2663 2668 heads = {}
2664 2669 futurelargelinkrevs = set()
2665 2670 for head in self.headrevs():
2666 2671 headlinkrev = self.linkrev(head)
2667 2672 heads[head] = headlinkrev
2668 2673 if headlinkrev >= minlink:
2669 2674 futurelargelinkrevs.add(headlinkrev)
2670 2675
2671 2676 # This algorithm involves walking down the rev graph, starting at the
2672 2677 # heads. Since the revs are topologically sorted according to linkrev,
2673 2678 # once all head linkrevs are below the minlink, we know there are
2674 2679 # no more revs that could have a linkrev greater than minlink.
2675 2680 # So we can stop walking.
2676 2681 while futurelargelinkrevs:
2677 2682 strippoint -= 1
2678 2683 linkrev = heads.pop(strippoint)
2679 2684
2680 2685 if linkrev < minlink:
2681 2686 brokenrevs.add(strippoint)
2682 2687 else:
2683 2688 futurelargelinkrevs.remove(linkrev)
2684 2689
2685 2690 for p in self.parentrevs(strippoint):
2686 2691 if p != nullrev:
2687 2692 plinkrev = self.linkrev(p)
2688 2693 heads[p] = plinkrev
2689 2694 if plinkrev >= minlink:
2690 2695 futurelargelinkrevs.add(plinkrev)
2691 2696
2692 2697 return strippoint, brokenrevs
2693 2698
2694 2699 def strip(self, minlink, transaction):
2695 2700 """truncate the revlog on the first revision with a linkrev >= minlink
2696 2701
2697 2702 This function is called when we're stripping revision minlink and
2698 2703 its descendants from the repository.
2699 2704
2700 2705 We have to remove all revisions with linkrev >= minlink, because
2701 2706 the equivalent changelog revisions will be renumbered after the
2702 2707 strip.
2703 2708
2704 2709 So we truncate the revlog on the first of these revisions, and
2705 2710 trust that the caller has saved the revisions that shouldn't be
2706 2711 removed and that it'll re-add them after this truncation.
2707 2712 """
2708 2713 if len(self) == 0:
2709 2714 return
2710 2715
2711 2716 rev, _ = self.getstrippoint(minlink)
2712 2717 if rev == len(self):
2713 2718 return
2714 2719
2715 2720 # first truncate the files on disk
2716 2721 end = self.start(rev)
2717 2722 if not self._inline:
2718 2723 transaction.add(self.datafile, end)
2719 2724 end = rev * self._io.size
2720 2725 else:
2721 2726 end += rev * self._io.size
2722 2727
2723 2728 transaction.add(self.indexfile, end)
2724 2729
2725 2730 # then reset internal state in memory to forget those revisions
2726 2731 self._cache = None
2727 2732 self._chaininfocache = {}
2728 2733 self._chunkclear()
2729 2734 for x in xrange(rev, len(self)):
2730 2735 del self.nodemap[self.node(x)]
2731 2736
2732 2737 del self.index[rev:-1]
2733 2738 self._nodepos = None
2734 2739
2735 2740 def checksize(self):
2736 2741 expected = 0
2737 2742 if len(self):
2738 2743 expected = max(0, self.end(len(self) - 1))
2739 2744
2740 2745 try:
2741 2746 with self._datafp() as f:
2742 2747 f.seek(0, 2)
2743 2748 actual = f.tell()
2744 2749 dd = actual - expected
2745 2750 except IOError as inst:
2746 2751 if inst.errno != errno.ENOENT:
2747 2752 raise
2748 2753 dd = 0
2749 2754
2750 2755 try:
2751 2756 f = self.opener(self.indexfile)
2752 2757 f.seek(0, 2)
2753 2758 actual = f.tell()
2754 2759 f.close()
2755 2760 s = self._io.size
2756 2761 i = max(0, actual // s)
2757 2762 di = actual - (i * s)
2758 2763 if self._inline:
2759 2764 databytes = 0
2760 2765 for r in self:
2761 2766 databytes += max(0, self.length(r))
2762 2767 dd = 0
2763 2768 di = actual - len(self) * s - databytes
2764 2769 except IOError as inst:
2765 2770 if inst.errno != errno.ENOENT:
2766 2771 raise
2767 2772 di = 0
2768 2773
2769 2774 return (dd, di)
2770 2775
2771 2776 def files(self):
2772 2777 res = [self.indexfile]
2773 2778 if not self._inline:
2774 2779 res.append(self.datafile)
2775 2780 return res
2776 2781
2777 2782 DELTAREUSEALWAYS = 'always'
2778 2783 DELTAREUSESAMEREVS = 'samerevs'
2779 2784 DELTAREUSENEVER = 'never'
2780 2785
2781 2786 DELTAREUSEFULLADD = 'fulladd'
2782 2787
2783 2788 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2784 2789
2785 2790 def clone(self, tr, destrevlog, addrevisioncb=None,
2786 2791 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2787 2792 """Copy this revlog to another, possibly with format changes.
2788 2793
2789 2794 The destination revlog will contain the same revisions and nodes.
2790 2795 However, it may not be bit-for-bit identical due to e.g. delta encoding
2791 2796 differences.
2792 2797
2793 2798 The ``deltareuse`` argument control how deltas from the existing revlog
2794 2799 are preserved in the destination revlog. The argument can have the
2795 2800 following values:
2796 2801
2797 2802 DELTAREUSEALWAYS
2798 2803 Deltas will always be reused (if possible), even if the destination
2799 2804 revlog would not select the same revisions for the delta. This is the
2800 2805 fastest mode of operation.
2801 2806 DELTAREUSESAMEREVS
2802 2807 Deltas will be reused if the destination revlog would pick the same
2803 2808 revisions for the delta. This mode strikes a balance between speed
2804 2809 and optimization.
2805 2810 DELTAREUSENEVER
2806 2811 Deltas will never be reused. This is the slowest mode of execution.
2807 2812 This mode can be used to recompute deltas (e.g. if the diff/delta
2808 2813 algorithm changes).
2809 2814
2810 2815 Delta computation can be slow, so the choice of delta reuse policy can
2811 2816 significantly affect run time.
2812 2817
2813 2818 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2814 2819 two extremes. Deltas will be reused if they are appropriate. But if the
2815 2820 delta could choose a better revision, it will do so. This means if you
2816 2821 are converting a non-generaldelta revlog to a generaldelta revlog,
2817 2822 deltas will be recomputed if the delta's parent isn't a parent of the
2818 2823 revision.
2819 2824
2820 2825 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2821 2826 controls whether to compute deltas against both parents for merges.
2822 2827 By default, the current default is used.
2823 2828 """
2824 2829 if deltareuse not in self.DELTAREUSEALL:
2825 2830 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2826 2831
2827 2832 if len(destrevlog):
2828 2833 raise ValueError(_('destination revlog is not empty'))
2829 2834
2830 2835 if getattr(self, 'filteredrevs', None):
2831 2836 raise ValueError(_('source revlog has filtered revisions'))
2832 2837 if getattr(destrevlog, 'filteredrevs', None):
2833 2838 raise ValueError(_('destination revlog has filtered revisions'))
2834 2839
2835 2840 # lazydeltabase controls whether to reuse a cached delta, if possible.
2836 2841 oldlazydeltabase = destrevlog._lazydeltabase
2837 2842 oldamd = destrevlog._aggressivemergedeltas
2838 2843
2839 2844 try:
2840 2845 if deltareuse == self.DELTAREUSEALWAYS:
2841 2846 destrevlog._lazydeltabase = True
2842 2847 elif deltareuse == self.DELTAREUSESAMEREVS:
2843 2848 destrevlog._lazydeltabase = False
2844 2849
2845 2850 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2846 2851
2847 2852 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2848 2853 self.DELTAREUSESAMEREVS)
2849 2854
2850 2855 deltacomputer = _deltacomputer(destrevlog)
2851 2856 index = self.index
2852 2857 for rev in self:
2853 2858 entry = index[rev]
2854 2859
2855 2860 # Some classes override linkrev to take filtered revs into
2856 2861 # account. Use raw entry from index.
2857 2862 flags = entry[0] & 0xffff
2858 2863 linkrev = entry[4]
2859 2864 p1 = index[entry[5]][7]
2860 2865 p2 = index[entry[6]][7]
2861 2866 node = entry[7]
2862 2867
2863 2868 # (Possibly) reuse the delta from the revlog if allowed and
2864 2869 # the revlog chunk is a delta.
2865 2870 cachedelta = None
2866 2871 rawtext = None
2867 2872 if populatecachedelta:
2868 2873 dp = self.deltaparent(rev)
2869 2874 if dp != nullrev:
2870 2875 cachedelta = (dp, bytes(self._chunk(rev)))
2871 2876
2872 2877 if not cachedelta:
2873 2878 rawtext = self.revision(rev, raw=True)
2874 2879
2875 2880
2876 2881 if deltareuse == self.DELTAREUSEFULLADD:
2877 2882 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2878 2883 cachedelta=cachedelta,
2879 2884 node=node, flags=flags,
2880 2885 deltacomputer=deltacomputer)
2881 2886 else:
2882 2887 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2883 2888 checkambig=False)
2884 2889 dfh = None
2885 2890 if not destrevlog._inline:
2886 2891 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2887 2892 try:
2888 2893 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2889 2894 p2, flags, cachedelta, ifh, dfh,
2890 2895 deltacomputer=deltacomputer)
2891 2896 finally:
2892 2897 if dfh:
2893 2898 dfh.close()
2894 2899 ifh.close()
2895 2900
2896 2901 if addrevisioncb:
2897 2902 addrevisioncb(self, rev, node)
2898 2903 finally:
2899 2904 destrevlog._lazydeltabase = oldlazydeltabase
2900 2905 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now