##// END OF EJS Templates
sparse-revlog: drop unused deltainfo parameter from segmentspan...
Boris Feld -
r40641:a32ccd32 default
parent child Browse files
Show More
@@ -1,902 +1,896
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import heapq
14 14 import struct
15 15
16 16 # import stuff from node for others to import from revlog
17 17 from ..node import (
18 18 nullrev,
19 19 )
20 20 from ..i18n import _
21 21
22 22 from .constants import (
23 23 REVIDX_ISCENSORED,
24 24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 25 )
26 26
27 27 from ..thirdparty import (
28 28 attr,
29 29 )
30 30
31 31 from .. import (
32 32 error,
33 33 mdiff,
34 34 )
35 35
36 36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 37 LIMIT_DELTA2TEXT = 2
38 38
39 39 class _testrevlog(object):
40 40 """minimalist fake revlog to use in doctests"""
41 41
42 42 def __init__(self, data, density=0.5, mingap=0):
43 43 """data is an list of revision payload boundaries"""
44 44 self._data = data
45 45 self._srdensitythreshold = density
46 46 self._srmingapsize = mingap
47 47
48 48 def start(self, rev):
49 49 if rev == 0:
50 50 return 0
51 51 return self._data[rev - 1]
52 52
53 53 def end(self, rev):
54 54 return self._data[rev]
55 55
56 56 def length(self, rev):
57 57 return self.end(rev) - self.start(rev)
58 58
59 59 def __len__(self):
60 60 return len(self._data)
61 61
62 62 def slicechunk(revlog, revs, targetsize=None):
63 63 """slice revs to reduce the amount of unrelated data to be read from disk.
64 64
65 65 ``revs`` is sliced into groups that should be read in one time.
66 66 Assume that revs are sorted.
67 67
68 68 The initial chunk is sliced until the overall density (payload/chunks-span
69 69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
70 70 `revlog._srmingapsize` is skipped.
71 71
72 72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
73 73 For consistency with other slicing choice, this limit won't go lower than
74 74 `revlog._srmingapsize`.
75 75
76 76 If individual revisions chunk are larger than this limit, they will still
77 77 be raised individually.
78 78
79 79 >>> revlog = _testrevlog([
80 80 ... 5, #00 (5)
81 81 ... 10, #01 (5)
82 82 ... 12, #02 (2)
83 83 ... 12, #03 (empty)
84 84 ... 27, #04 (15)
85 85 ... 31, #05 (4)
86 86 ... 31, #06 (empty)
87 87 ... 42, #07 (11)
88 88 ... 47, #08 (5)
89 89 ... 47, #09 (empty)
90 90 ... 48, #10 (1)
91 91 ... 51, #11 (3)
92 92 ... 74, #12 (23)
93 93 ... 85, #13 (11)
94 94 ... 86, #14 (1)
95 95 ... 91, #15 (5)
96 96 ... ])
97 97
98 98 >>> list(slicechunk(revlog, list(range(16))))
99 99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
100 100 >>> list(slicechunk(revlog, [0, 15]))
101 101 [[0], [15]]
102 102 >>> list(slicechunk(revlog, [0, 11, 15]))
103 103 [[0], [11], [15]]
104 104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
105 105 [[0], [11, 13, 15]]
106 106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
107 107 [[1, 2], [5, 8, 10, 11], [14]]
108 108
109 109 Slicing with a maximum chunk size
110 110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
111 111 [[0], [11], [13], [15]]
112 112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
113 113 [[0], [11], [13, 15]]
114 114 """
115 115 if targetsize is not None:
116 116 targetsize = max(targetsize, revlog._srmingapsize)
117 117 # targetsize should not be specified when evaluating delta candidates:
118 118 # * targetsize is used to ensure we stay within specification when reading,
119 119 for chunk in _slicechunktodensity(revlog, revs,
120 120 revlog._srdensitythreshold,
121 121 revlog._srmingapsize):
122 122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
123 123 yield subchunk
124 124
125 125 def _slicechunktosize(revlog, revs, targetsize=None):
126 126 """slice revs to match the target size
127 127
128 128 This is intended to be used on chunk that density slicing selected by that
129 129 are still too large compared to the read garantee of revlog. This might
130 130 happens when "minimal gap size" interrupted the slicing or when chain are
131 131 built in a way that create large blocks next to each other.
132 132
133 133 >>> revlog = _testrevlog([
134 134 ... 3, #0 (3)
135 135 ... 5, #1 (2)
136 136 ... 6, #2 (1)
137 137 ... 8, #3 (2)
138 138 ... 8, #4 (empty)
139 139 ... 11, #5 (3)
140 140 ... 12, #6 (1)
141 141 ... 13, #7 (1)
142 142 ... 14, #8 (1)
143 143 ... ])
144 144
145 145 Cases where chunk is already small enough
146 146 >>> list(_slicechunktosize(revlog, [0], 3))
147 147 [[0]]
148 148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
149 149 [[6, 7]]
150 150 >>> list(_slicechunktosize(revlog, [0], None))
151 151 [[0]]
152 152 >>> list(_slicechunktosize(revlog, [6, 7], None))
153 153 [[6, 7]]
154 154
155 155 cases where we need actual slicing
156 156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
157 157 [[0], [1]]
158 158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
159 159 [[1], [3]]
160 160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
161 161 [[1, 2], [3]]
162 162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
163 163 [[3], [5]]
164 164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
165 165 [[3], [5]]
166 166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
167 167 [[5], [6, 7, 8]]
168 168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
169 169 [[0], [1, 2], [3], [5], [6, 7, 8]]
170 170
171 171 Case with too large individual chunk (must return valid chunk)
172 172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
173 173 [[0], [1]]
174 174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
175 175 [[1], [3]]
176 176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
177 177 [[3], [5]]
178 178 """
179 179 assert targetsize is None or 0 <= targetsize
180 180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
181 181 yield revs
182 182 return
183 183
184 184 startrevidx = 0
185 185 startdata = revlog.start(revs[0])
186 186 endrevidx = 0
187 187 iterrevs = enumerate(revs)
188 188 next(iterrevs) # skip first rev.
189 189 for idx, r in iterrevs:
190 190 span = revlog.end(r) - startdata
191 191 if span <= targetsize:
192 192 endrevidx = idx
193 193 else:
194 194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
195 195 if chunk:
196 196 yield chunk
197 197 startrevidx = idx
198 198 startdata = revlog.start(r)
199 199 endrevidx = idx
200 200 yield _trimchunk(revlog, revs, startrevidx)
201 201
202 202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
203 203 mingapsize=0):
204 204 """slice revs to reduce the amount of unrelated data to be read from disk.
205 205
206 206 ``revs`` is sliced into groups that should be read in one time.
207 207 Assume that revs are sorted.
208 208
209 209 The initial chunk is sliced until the overall density (payload/chunks-span
210 210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
211 211 skipped.
212 212
213 213 >>> revlog = _testrevlog([
214 214 ... 5, #00 (5)
215 215 ... 10, #01 (5)
216 216 ... 12, #02 (2)
217 217 ... 12, #03 (empty)
218 218 ... 27, #04 (15)
219 219 ... 31, #05 (4)
220 220 ... 31, #06 (empty)
221 221 ... 42, #07 (11)
222 222 ... 47, #08 (5)
223 223 ... 47, #09 (empty)
224 224 ... 48, #10 (1)
225 225 ... 51, #11 (3)
226 226 ... 74, #12 (23)
227 227 ... 85, #13 (11)
228 228 ... 86, #14 (1)
229 229 ... 91, #15 (5)
230 230 ... ])
231 231
232 232 >>> list(_slicechunktodensity(revlog, list(range(16))))
233 233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
234 234 >>> list(_slicechunktodensity(revlog, [0, 15]))
235 235 [[0], [15]]
236 236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
237 237 [[0], [11], [15]]
238 238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
239 239 [[0], [11, 13, 15]]
240 240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
241 241 [[1, 2], [5, 8, 10, 11], [14]]
242 242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
243 243 ... mingapsize=20))
244 244 [[1, 2, 3, 5, 8, 10, 11], [14]]
245 245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
246 246 ... targetdensity=0.95))
247 247 [[1, 2], [5], [8, 10, 11], [14]]
248 248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
249 249 ... targetdensity=0.95, mingapsize=12))
250 250 [[1, 2], [5, 8, 10, 11], [14]]
251 251 """
252 252 start = revlog.start
253 253 length = revlog.length
254 254
255 255 if len(revs) <= 1:
256 256 yield revs
257 257 return
258 258
259 259 deltachainspan = segmentspan(revlog, revs)
260 260 chainpayload = sum(length(r) for r in revs)
261 261
262 262 if deltachainspan < mingapsize:
263 263 yield revs
264 264 return
265 265
266 266 readdata = deltachainspan
267 267
268 268 if deltachainspan:
269 269 density = chainpayload / float(deltachainspan)
270 270 else:
271 271 density = 1.0
272 272
273 273 if density >= targetdensity:
274 274 yield revs
275 275 return
276 276
277 277 # Store the gaps in a heap to have them sorted by decreasing size
278 278 gapsheap = []
279 279 heapq.heapify(gapsheap)
280 280 prevend = None
281 281 for i, rev in enumerate(revs):
282 282 revstart = start(rev)
283 283 revlen = length(rev)
284 284
285 285 # Skip empty revisions to form larger holes
286 286 if revlen == 0:
287 287 continue
288 288
289 289 if prevend is not None:
290 290 gapsize = revstart - prevend
291 291 # only consider holes that are large enough
292 292 if gapsize > mingapsize:
293 293 heapq.heappush(gapsheap, (-gapsize, i))
294 294
295 295 prevend = revstart + revlen
296 296
297 297 # Collect the indices of the largest holes until the density is acceptable
298 298 indicesheap = []
299 299 heapq.heapify(indicesheap)
300 300 while gapsheap and density < targetdensity:
301 301 oppgapsize, gapidx = heapq.heappop(gapsheap)
302 302
303 303 heapq.heappush(indicesheap, gapidx)
304 304
305 305 # the gap sizes are stored as negatives to be sorted decreasingly
306 306 # by the heap
307 307 readdata -= (-oppgapsize)
308 308 if readdata > 0:
309 309 density = chainpayload / float(readdata)
310 310 else:
311 311 density = 1.0
312 312
313 313 # Cut the revs at collected indices
314 314 previdx = 0
315 315 while indicesheap:
316 316 idx = heapq.heappop(indicesheap)
317 317
318 318 chunk = _trimchunk(revlog, revs, previdx, idx)
319 319 if chunk:
320 320 yield chunk
321 321
322 322 previdx = idx
323 323
324 324 chunk = _trimchunk(revlog, revs, previdx)
325 325 if chunk:
326 326 yield chunk
327 327
328 328 def _trimchunk(revlog, revs, startidx, endidx=None):
329 329 """returns revs[startidx:endidx] without empty trailing revs
330 330
331 331 Doctest Setup
332 332 >>> revlog = _testrevlog([
333 333 ... 5, #0
334 334 ... 10, #1
335 335 ... 12, #2
336 336 ... 12, #3 (empty)
337 337 ... 17, #4
338 338 ... 21, #5
339 339 ... 21, #6 (empty)
340 340 ... ])
341 341
342 342 Contiguous cases:
343 343 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
344 344 [0, 1, 2, 3, 4, 5]
345 345 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
346 346 [0, 1, 2, 3, 4]
347 347 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
348 348 [0, 1, 2]
349 349 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
350 350 [2]
351 351 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
352 352 [3, 4, 5]
353 353 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
354 354 [3, 4]
355 355
356 356 Discontiguous cases:
357 357 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
358 358 [1, 3, 5]
359 359 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
360 360 [1]
361 361 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
362 362 [3, 5]
363 363 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
364 364 [3, 5]
365 365 """
366 366 length = revlog.length
367 367
368 368 if endidx is None:
369 369 endidx = len(revs)
370 370
371 371 # If we have a non-emtpy delta candidate, there are nothing to trim
372 372 if revs[endidx - 1] < len(revlog):
373 373 # Trim empty revs at the end, except the very first revision of a chain
374 374 while (endidx > 1
375 375 and endidx > startidx
376 376 and length(revs[endidx - 1]) == 0):
377 377 endidx -= 1
378 378
379 379 return revs[startidx:endidx]
380 380
381 def segmentspan(revlog, revs, deltainfo=None):
381 def segmentspan(revlog, revs):
382 382 """Get the byte span of a segment of revisions
383 383
384 384 revs is a sorted array of revision numbers
385 385
386 386 >>> revlog = _testrevlog([
387 387 ... 5, #0
388 388 ... 10, #1
389 389 ... 12, #2
390 390 ... 12, #3 (empty)
391 391 ... 17, #4
392 392 ... ])
393 393
394 394 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
395 395 17
396 396 >>> segmentspan(revlog, [0, 4])
397 397 17
398 398 >>> segmentspan(revlog, [3, 4])
399 399 5
400 400 >>> segmentspan(revlog, [1, 2, 3,])
401 401 7
402 402 >>> segmentspan(revlog, [1, 3])
403 403 7
404 404 """
405 405 if not revs:
406 406 return 0
407 if deltainfo is not None and len(revlog) <= revs[-1]:
408 if len(revs) == 1:
409 return deltainfo.deltalen
410 offset = revlog.end(len(revlog) - 1)
411 end = deltainfo.deltalen + offset
412 else:
413 end = revlog.end(revs[-1])
407 end = revlog.end(revs[-1])
414 408 return end - revlog.start(revs[0])
415 409
416 410 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
417 411 """build full text from a (base, delta) pair and other metadata"""
418 412 # special case deltas which replace entire base; no need to decode
419 413 # base revision. this neatly avoids censored bases, which throw when
420 414 # they're decoded.
421 415 hlen = struct.calcsize(">lll")
422 416 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
423 417 len(delta) - hlen):
424 418 fulltext = delta[hlen:]
425 419 else:
426 420 # deltabase is rawtext before changed by flag processors, which is
427 421 # equivalent to non-raw text
428 422 basetext = revlog.revision(baserev, _df=fh, raw=False)
429 423 fulltext = mdiff.patch(basetext, delta)
430 424
431 425 try:
432 426 res = revlog._processflags(fulltext, flags, 'read', raw=True)
433 427 fulltext, validatehash = res
434 428 if validatehash:
435 429 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
436 430 if flags & REVIDX_ISCENSORED:
437 431 raise error.StorageError(_('node %s is not censored') %
438 432 expectednode)
439 433 except error.CensoredNodeError:
440 434 # must pass the censored index flag to add censored revisions
441 435 if not flags & REVIDX_ISCENSORED:
442 436 raise
443 437 return fulltext
444 438
445 439 @attr.s(slots=True, frozen=True)
446 440 class _deltainfo(object):
447 441 distance = attr.ib()
448 442 deltalen = attr.ib()
449 443 data = attr.ib()
450 444 base = attr.ib()
451 445 chainbase = attr.ib()
452 446 chainlen = attr.ib()
453 447 compresseddeltalen = attr.ib()
454 448 snapshotdepth = attr.ib()
455 449
456 450 def isgooddeltainfo(revlog, deltainfo, revinfo):
457 451 """Returns True if the given delta is good. Good means that it is within
458 452 the disk span, disk size, and chain length bounds that we know to be
459 453 performant."""
460 454 if deltainfo is None:
461 455 return False
462 456
463 457 # - 'deltainfo.distance' is the distance from the base revision --
464 458 # bounding it limits the amount of I/O we need to do.
465 459 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
466 460 # deltas we need to apply -- bounding it limits the amount of CPU
467 461 # we consume.
468 462
469 463 textlen = revinfo.textlen
470 464 defaultmax = textlen * 4
471 465 maxdist = revlog._maxdeltachainspan
472 466 if not maxdist:
473 467 maxdist = deltainfo.distance # ensure the conditional pass
474 468 maxdist = max(maxdist, defaultmax)
475 469
476 470 # Bad delta from read span:
477 471 #
478 472 # If the span of data read is larger than the maximum allowed.
479 473 #
480 474 # In the sparse-revlog case, we rely on the associated "sparse reading"
481 475 # to avoid issue related to the span of data. In theory, it would be
482 476 # possible to build pathological revlog where delta pattern would lead
483 477 # to too many reads. However, they do not happen in practice at all. So
484 478 # we skip the span check entirely.
485 479 if not revlog._sparserevlog and maxdist < deltainfo.distance:
486 480 return False
487 481
488 482 # Bad delta from new delta size:
489 483 #
490 484 # If the delta size is larger than the target text, storing the
491 485 # delta will be inefficient.
492 486 if textlen < deltainfo.deltalen:
493 487 return False
494 488
495 489 # Bad delta from cumulated payload size:
496 490 #
497 491 # If the sum of delta get larger than K * target text length.
498 492 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
499 493 return False
500 494
501 495 # Bad delta from chain length:
502 496 #
503 497 # If the number of delta in the chain gets too high.
504 498 if (revlog._maxchainlen
505 499 and revlog._maxchainlen < deltainfo.chainlen):
506 500 return False
507 501
508 502 # bad delta from intermediate snapshot size limit
509 503 #
510 504 # If an intermediate snapshot size is higher than the limit. The
511 505 # limit exist to prevent endless chain of intermediate delta to be
512 506 # created.
513 507 if (deltainfo.snapshotdepth is not None and
514 508 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
515 509 return False
516 510
517 511 # bad delta if new intermediate snapshot is larger than the previous
518 512 # snapshot
519 513 if (deltainfo.snapshotdepth
520 514 and revlog.length(deltainfo.base) < deltainfo.deltalen):
521 515 return False
522 516
523 517 return True
524 518
525 519 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
526 520 """Provides group of revision to be tested as delta base
527 521
528 522 This top level function focus on emitting groups with unique and worthwhile
529 523 content. See _raw_candidate_groups for details about the group order.
530 524 """
531 525 # should we try to build a delta?
532 526 if not (len(revlog) and revlog._storedeltachains):
533 527 yield None
534 528 return
535 529
536 530 deltalength = revlog.length
537 531 deltaparent = revlog.deltaparent
538 532 good = None
539 533
540 534 deltas_limit = textlen * LIMIT_DELTA2TEXT
541 535
542 536 tested = set([nullrev])
543 537 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
544 538 while True:
545 539 temptative = candidates.send(good)
546 540 if temptative is None:
547 541 break
548 542 group = []
549 543 for rev in temptative:
550 544 # skip over empty delta (no need to include them in a chain)
551 545 while (revlog._generaldelta
552 546 and not (rev == nullrev
553 547 or rev in tested
554 548 or deltalength(rev))):
555 549 tested.add(rev)
556 550 rev = deltaparent(rev)
557 551 # filter out revision we tested already
558 552 if rev in tested:
559 553 continue
560 554 tested.add(rev)
561 555 # filter out delta base that will never produce good delta
562 556 if deltas_limit < revlog.length(rev):
563 557 continue
564 558 # no need to try a delta against nullrev, this will be done as a
565 559 # last resort.
566 560 if rev == nullrev:
567 561 continue
568 562 # no delta for rawtext-changing revs (see "candelta" for why)
569 563 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
570 564 continue
571 565 group.append(rev)
572 566 if group:
573 567 # XXX: in the sparse revlog case, group can become large,
574 568 # impacting performances. Some bounding or slicing mecanism
575 569 # would help to reduce this impact.
576 570 good = yield tuple(group)
577 571 yield None
578 572
579 573 def _findsnapshots(revlog, cache, start_rev):
580 574 """find snapshot from start_rev to tip"""
581 575 deltaparent = revlog.deltaparent
582 576 issnapshot = revlog.issnapshot
583 577 for rev in revlog.revs(start_rev):
584 578 if issnapshot(rev):
585 579 cache[deltaparent(rev)].append(rev)
586 580
587 581 def _refinedgroups(revlog, p1, p2, cachedelta):
588 582 good = None
589 583 # First we try to reuse a the delta contained in the bundle.
590 584 # (or from the source revlog)
591 585 #
592 586 # This logic only applies to general delta repositories and can be disabled
593 587 # through configuration. Disabling reuse source delta is useful when
594 588 # we want to make sure we recomputed "optimal" deltas.
595 589 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
596 590 # Assume what we received from the server is a good choice
597 591 # build delta will reuse the cache
598 592 good = yield (cachedelta[0],)
599 593 if good is not None:
600 594 yield None
601 595 return
602 596 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
603 597 good = yield candidates
604 598 if good is not None:
605 599 break
606 600
607 601 # If sparse revlog is enabled, we can try to refine the available deltas
608 602 if not revlog._sparserevlog:
609 603 yield None
610 604 return
611 605
612 606 # if we have a refinable value, try to refine it
613 607 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
614 608 # refine snapshot down
615 609 previous = None
616 610 while previous != good:
617 611 previous = good
618 612 base = revlog.deltaparent(good)
619 613 if base == nullrev:
620 614 break
621 615 good = yield (base,)
622 616 # refine snapshot up
623 617 #
624 618 # XXX the _findsnapshots call can be expensive and is "duplicated" with
625 619 # the one done in `_rawgroups`. Once we start working on performance,
626 620 # we should make the two logics share this computation.
627 621 snapshots = collections.defaultdict(list)
628 622 _findsnapshots(revlog, snapshots, good + 1)
629 623 previous = None
630 624 while good != previous:
631 625 previous = good
632 626 children = tuple(sorted(c for c in snapshots[good]))
633 627 good = yield children
634 628
635 629 # we have found nothing
636 630 yield None
637 631
638 632 def _rawgroups(revlog, p1, p2, cachedelta):
639 633 """Provides group of revision to be tested as delta base
640 634
641 635 This lower level function focus on emitting delta theorically interresting
642 636 without looking it any practical details.
643 637
644 638 The group order aims at providing fast or small candidates first.
645 639 """
646 640 gdelta = revlog._generaldelta
647 641 sparse = revlog._sparserevlog
648 642 curr = len(revlog)
649 643 prev = curr - 1
650 644 deltachain = lambda rev: revlog._deltachain(rev)[0]
651 645
652 646 if gdelta:
653 647 # exclude already lazy tested base if any
654 648 parents = [p for p in (p1, p2) if p != nullrev]
655 649
656 650 if not revlog._deltabothparents and len(parents) == 2:
657 651 parents.sort()
658 652 # To minimize the chance of having to build a fulltext,
659 653 # pick first whichever parent is closest to us (max rev)
660 654 yield (parents[1],)
661 655 # then the other one (min rev) if the first did not fit
662 656 yield (parents[0],)
663 657 elif len(parents) > 0:
664 658 # Test all parents (1 or 2), and keep the best candidate
665 659 yield parents
666 660
667 661 if sparse and parents:
668 662 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
669 663 # See if we can use an existing snapshot in the parent chains to use as
670 664 # a base for a new intermediate-snapshot
671 665 #
672 666 # search for snapshot in parents delta chain
673 667 # map: snapshot-level: snapshot-rev
674 668 parents_snaps = collections.defaultdict(set)
675 669 candidate_chains = [deltachain(p) for p in parents]
676 670 for chain in candidate_chains:
677 671 for idx, s in enumerate(chain):
678 672 if not revlog.issnapshot(s):
679 673 break
680 674 parents_snaps[idx].add(s)
681 675 snapfloor = min(parents_snaps[0]) + 1
682 676 _findsnapshots(revlog, snapshots, snapfloor)
683 677 # search for the highest "unrelated" revision
684 678 #
685 679 # Adding snapshots used by "unrelated" revision increase the odd we
686 680 # reuse an independant, yet better snapshot chain.
687 681 #
688 682 # XXX instead of building a set of revisions, we could lazily enumerate
689 683 # over the chains. That would be more efficient, however we stick to
690 684 # simple code for now.
691 685 all_revs = set()
692 686 for chain in candidate_chains:
693 687 all_revs.update(chain)
694 688 other = None
695 689 for r in revlog.revs(prev, snapfloor):
696 690 if r not in all_revs:
697 691 other = r
698 692 break
699 693 if other is not None:
700 694 # To avoid unfair competition, we won't use unrelated intermediate
701 695 # snapshot that are deeper than the ones from the parent delta
702 696 # chain.
703 697 max_depth = max(parents_snaps.keys())
704 698 chain = deltachain(other)
705 699 for idx, s in enumerate(chain):
706 700 if s < snapfloor:
707 701 continue
708 702 if max_depth < idx:
709 703 break
710 704 if not revlog.issnapshot(s):
711 705 break
712 706 parents_snaps[idx].add(s)
713 707 # Test them as possible intermediate snapshot base
714 708 # We test them from highest to lowest level. High level one are more
715 709 # likely to result in small delta
716 710 floor = None
717 711 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
718 712 siblings = set()
719 713 for s in snaps:
720 714 siblings.update(snapshots[s])
721 715 # Before considering making a new intermediate snapshot, we check
722 716 # if an existing snapshot, children of base we consider, would be
723 717 # suitable.
724 718 #
725 719 # It give a change to reuse a delta chain "unrelated" to the
726 720 # current revision instead of starting our own. Without such
727 721 # re-use, topological branches would keep reopening new chains.
728 722 # Creating more and more snapshot as the repository grow.
729 723
730 724 if floor is not None:
731 725 # We only do this for siblings created after the one in our
732 726 # parent's delta chain. Those created before has less chances
733 727 # to be valid base since our ancestors had to create a new
734 728 # snapshot.
735 729 siblings = [r for r in siblings if floor < r]
736 730 yield tuple(sorted(siblings))
737 731 # then test the base from our parent's delta chain.
738 732 yield tuple(sorted(snaps))
739 733 floor = min(snaps)
740 734 # No suitable base found in the parent chain, search if any full
741 735 # snapshots emitted since parent's base would be a suitable base for an
742 736 # intermediate snapshot.
743 737 #
744 738 # It give a chance to reuse a delta chain unrelated to the current
745 739 # revisions instead of starting our own. Without such re-use,
746 740 # topological branches would keep reopening new full chains. Creating
747 741 # more and more snapshot as the repository grow.
748 742 yield tuple(snapshots[nullrev])
749 743
750 744 if not sparse:
751 745 # other approach failed try against prev to hopefully save us a
752 746 # fulltext.
753 747 yield (prev,)
754 748
755 749 class deltacomputer(object):
756 750 def __init__(self, revlog):
757 751 self.revlog = revlog
758 752
759 753 def buildtext(self, revinfo, fh):
760 754 """Builds a fulltext version of a revision
761 755
762 756 revinfo: _revisioninfo instance that contains all needed info
763 757 fh: file handle to either the .i or the .d revlog file,
764 758 depending on whether it is inlined or not
765 759 """
766 760 btext = revinfo.btext
767 761 if btext[0] is not None:
768 762 return btext[0]
769 763
770 764 revlog = self.revlog
771 765 cachedelta = revinfo.cachedelta
772 766 baserev = cachedelta[0]
773 767 delta = cachedelta[1]
774 768
775 769 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
776 770 revinfo.p1, revinfo.p2,
777 771 revinfo.flags, revinfo.node)
778 772 return fulltext
779 773
780 774 def _builddeltadiff(self, base, revinfo, fh):
781 775 revlog = self.revlog
782 776 t = self.buildtext(revinfo, fh)
783 777 if revlog.iscensored(base):
784 778 # deltas based on a censored revision must replace the
785 779 # full content in one patch, so delta works everywhere
786 780 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
787 781 delta = header + t
788 782 else:
789 783 ptext = revlog.revision(base, _df=fh, raw=True)
790 784 delta = mdiff.textdiff(ptext, t)
791 785
792 786 return delta
793 787
794 788 def _builddeltainfo(self, revinfo, base, fh):
795 789 # can we use the cached delta?
796 790 delta = None
797 791 if revinfo.cachedelta:
798 792 cachebase, cachediff = revinfo.cachedelta
799 793 #check if the diff still apply
800 794 currentbase = cachebase
801 795 while (currentbase != nullrev
802 796 and currentbase != base
803 797 and self.revlog.length(currentbase) == 0):
804 798 currentbase = self.revlog.deltaparent(currentbase)
805 799 if currentbase == base:
806 800 delta = revinfo.cachedelta[1]
807 801 if delta is None:
808 802 delta = self._builddeltadiff(base, revinfo, fh)
809 803 revlog = self.revlog
810 804 header, data = revlog.compress(delta)
811 805 deltalen = len(header) + len(data)
812 806 chainbase = revlog.chainbase(base)
813 807 offset = revlog.end(len(revlog) - 1)
814 808 dist = deltalen + offset - revlog.start(chainbase)
815 809 if revlog._generaldelta:
816 810 deltabase = base
817 811 else:
818 812 deltabase = chainbase
819 813 chainlen, compresseddeltalen = revlog._chaininfo(base)
820 814 chainlen += 1
821 815 compresseddeltalen += deltalen
822 816
823 817 revlog = self.revlog
824 818 snapshotdepth = None
825 819 if deltabase == nullrev:
826 820 snapshotdepth = 0
827 821 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
828 822 # A delta chain should always be one full snapshot,
829 823 # zero or more semi-snapshots, and zero or more deltas
830 824 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
831 825 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
832 826 snapshotdepth = len(revlog._deltachain(deltabase)[0])
833 827
834 828 return _deltainfo(dist, deltalen, (header, data), deltabase,
835 829 chainbase, chainlen, compresseddeltalen,
836 830 snapshotdepth)
837 831
838 832 def _fullsnapshotinfo(self, fh, revinfo):
839 833 curr = len(self.revlog)
840 834 rawtext = self.buildtext(revinfo, fh)
841 835 data = self.revlog.compress(rawtext)
842 836 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
843 837 deltabase = chainbase = curr
844 838 snapshotdepth = 0
845 839 chainlen = 1
846 840
847 841 return _deltainfo(dist, deltalen, data, deltabase,
848 842 chainbase, chainlen, compresseddeltalen,
849 843 snapshotdepth)
850 844
851 845 def finddeltainfo(self, revinfo, fh):
852 846 """Find an acceptable delta against a candidate revision
853 847
854 848 revinfo: information about the revision (instance of _revisioninfo)
855 849 fh: file handle to either the .i or the .d revlog file,
856 850 depending on whether it is inlined or not
857 851
858 852 Returns the first acceptable candidate revision, as ordered by
859 853 _candidategroups
860 854
861 855 If no suitable deltabase is found, we return delta info for a full
862 856 snapshot.
863 857 """
864 858 if not revinfo.textlen:
865 859 return self._fullsnapshotinfo(fh, revinfo)
866 860
867 861 # no delta for flag processor revision (see "candelta" for why)
868 862 # not calling candelta since only one revision needs test, also to
869 863 # avoid overhead fetching flags again.
870 864 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
871 865 return self._fullsnapshotinfo(fh, revinfo)
872 866
873 867 cachedelta = revinfo.cachedelta
874 868 p1 = revinfo.p1
875 869 p2 = revinfo.p2
876 870 revlog = self.revlog
877 871
878 872 deltainfo = None
879 873 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
880 874 groups = _candidategroups(self.revlog, revinfo.textlen,
881 875 p1r, p2r, cachedelta)
882 876 candidaterevs = next(groups)
883 877 while candidaterevs is not None:
884 878 nominateddeltas = []
885 879 if deltainfo is not None:
886 880 # if we already found a good delta,
887 881 # challenge it against refined candidates
888 882 nominateddeltas.append(deltainfo)
889 883 for candidaterev in candidaterevs:
890 884 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
891 885 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
892 886 nominateddeltas.append(candidatedelta)
893 887 if nominateddeltas:
894 888 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
895 889 if deltainfo is not None:
896 890 candidaterevs = groups.send(deltainfo.base)
897 891 else:
898 892 candidaterevs = next(groups)
899 893
900 894 if deltainfo is None:
901 895 deltainfo = self._fullsnapshotinfo(fh, revinfo)
902 896 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now