##// END OF EJS Templates
sparse-revlog: drop unused deltainfo parameter from _slicechunktodensity...
Boris Feld -
r40640:3ac23dad default
parent child Browse files
Show More
@@ -1,925 +1,902 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import heapq
14 14 import struct
15 15
16 16 # import stuff from node for others to import from revlog
17 17 from ..node import (
18 18 nullrev,
19 19 )
20 20 from ..i18n import _
21 21
22 22 from .constants import (
23 23 REVIDX_ISCENSORED,
24 24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 25 )
26 26
27 27 from ..thirdparty import (
28 28 attr,
29 29 )
30 30
31 31 from .. import (
32 32 error,
33 33 mdiff,
34 34 )
35 35
36 36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 37 LIMIT_DELTA2TEXT = 2
38 38
39 39 class _testrevlog(object):
40 40 """minimalist fake revlog to use in doctests"""
41 41
42 42 def __init__(self, data, density=0.5, mingap=0):
43 43 """data is an list of revision payload boundaries"""
44 44 self._data = data
45 45 self._srdensitythreshold = density
46 46 self._srmingapsize = mingap
47 47
48 48 def start(self, rev):
49 49 if rev == 0:
50 50 return 0
51 51 return self._data[rev - 1]
52 52
53 53 def end(self, rev):
54 54 return self._data[rev]
55 55
56 56 def length(self, rev):
57 57 return self.end(rev) - self.start(rev)
58 58
59 59 def __len__(self):
60 60 return len(self._data)
61 61
62 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
62 def slicechunk(revlog, revs, targetsize=None):
63 63 """slice revs to reduce the amount of unrelated data to be read from disk.
64 64
65 65 ``revs`` is sliced into groups that should be read in one time.
66 66 Assume that revs are sorted.
67 67
68 68 The initial chunk is sliced until the overall density (payload/chunks-span
69 69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
70 70 `revlog._srmingapsize` is skipped.
71 71
72 72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
73 73 For consistency with other slicing choice, this limit won't go lower than
74 74 `revlog._srmingapsize`.
75 75
76 76 If individual revisions chunk are larger than this limit, they will still
77 77 be raised individually.
78 78
79 79 >>> revlog = _testrevlog([
80 80 ... 5, #00 (5)
81 81 ... 10, #01 (5)
82 82 ... 12, #02 (2)
83 83 ... 12, #03 (empty)
84 84 ... 27, #04 (15)
85 85 ... 31, #05 (4)
86 86 ... 31, #06 (empty)
87 87 ... 42, #07 (11)
88 88 ... 47, #08 (5)
89 89 ... 47, #09 (empty)
90 90 ... 48, #10 (1)
91 91 ... 51, #11 (3)
92 92 ... 74, #12 (23)
93 93 ... 85, #13 (11)
94 94 ... 86, #14 (1)
95 95 ... 91, #15 (5)
96 96 ... ])
97 97
98 98 >>> list(slicechunk(revlog, list(range(16))))
99 99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
100 100 >>> list(slicechunk(revlog, [0, 15]))
101 101 [[0], [15]]
102 102 >>> list(slicechunk(revlog, [0, 11, 15]))
103 103 [[0], [11], [15]]
104 104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
105 105 [[0], [11, 13, 15]]
106 106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
107 107 [[1, 2], [5, 8, 10, 11], [14]]
108 108
109 109 Slicing with a maximum chunk size
110 110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
111 111 [[0], [11], [13], [15]]
112 112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
113 113 [[0], [11], [13, 15]]
114 114 """
115 115 if targetsize is not None:
116 116 targetsize = max(targetsize, revlog._srmingapsize)
117 117 # targetsize should not be specified when evaluating delta candidates:
118 118 # * targetsize is used to ensure we stay within specification when reading,
119 # * deltainfo is used to pick are good delta chain when writing.
120 if not (deltainfo is None or targetsize is None):
121 msg = 'cannot use `targetsize` with a `deltainfo`'
122 raise error.ProgrammingError(msg)
123 119 for chunk in _slicechunktodensity(revlog, revs,
124 deltainfo,
125 120 revlog._srdensitythreshold,
126 121 revlog._srmingapsize):
127 122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
128 123 yield subchunk
129 124
130 125 def _slicechunktosize(revlog, revs, targetsize=None):
131 126 """slice revs to match the target size
132 127
133 128 This is intended to be used on chunk that density slicing selected by that
134 129 are still too large compared to the read garantee of revlog. This might
135 130 happens when "minimal gap size" interrupted the slicing or when chain are
136 131 built in a way that create large blocks next to each other.
137 132
138 133 >>> revlog = _testrevlog([
139 134 ... 3, #0 (3)
140 135 ... 5, #1 (2)
141 136 ... 6, #2 (1)
142 137 ... 8, #3 (2)
143 138 ... 8, #4 (empty)
144 139 ... 11, #5 (3)
145 140 ... 12, #6 (1)
146 141 ... 13, #7 (1)
147 142 ... 14, #8 (1)
148 143 ... ])
149 144
150 145 Cases where chunk is already small enough
151 146 >>> list(_slicechunktosize(revlog, [0], 3))
152 147 [[0]]
153 148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
154 149 [[6, 7]]
155 150 >>> list(_slicechunktosize(revlog, [0], None))
156 151 [[0]]
157 152 >>> list(_slicechunktosize(revlog, [6, 7], None))
158 153 [[6, 7]]
159 154
160 155 cases where we need actual slicing
161 156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
162 157 [[0], [1]]
163 158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
164 159 [[1], [3]]
165 160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
166 161 [[1, 2], [3]]
167 162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
168 163 [[3], [5]]
169 164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
170 165 [[3], [5]]
171 166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
172 167 [[5], [6, 7, 8]]
173 168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
174 169 [[0], [1, 2], [3], [5], [6, 7, 8]]
175 170
176 171 Case with too large individual chunk (must return valid chunk)
177 172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
178 173 [[0], [1]]
179 174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
180 175 [[1], [3]]
181 176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
182 177 [[3], [5]]
183 178 """
184 179 assert targetsize is None or 0 <= targetsize
185 180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
186 181 yield revs
187 182 return
188 183
189 184 startrevidx = 0
190 185 startdata = revlog.start(revs[0])
191 186 endrevidx = 0
192 187 iterrevs = enumerate(revs)
193 188 next(iterrevs) # skip first rev.
194 189 for idx, r in iterrevs:
195 190 span = revlog.end(r) - startdata
196 191 if span <= targetsize:
197 192 endrevidx = idx
198 193 else:
199 194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
200 195 if chunk:
201 196 yield chunk
202 197 startrevidx = idx
203 198 startdata = revlog.start(r)
204 199 endrevidx = idx
205 200 yield _trimchunk(revlog, revs, startrevidx)
206 201
207 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
208 203 mingapsize=0):
209 204 """slice revs to reduce the amount of unrelated data to be read from disk.
210 205
211 206 ``revs`` is sliced into groups that should be read in one time.
212 207 Assume that revs are sorted.
213 208
214 ``deltainfo`` is a _deltainfo instance of a revision that we would append
215 to the top of the revlog.
216
217 209 The initial chunk is sliced until the overall density (payload/chunks-span
218 210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
219 211 skipped.
220 212
221 213 >>> revlog = _testrevlog([
222 214 ... 5, #00 (5)
223 215 ... 10, #01 (5)
224 216 ... 12, #02 (2)
225 217 ... 12, #03 (empty)
226 218 ... 27, #04 (15)
227 219 ... 31, #05 (4)
228 220 ... 31, #06 (empty)
229 221 ... 42, #07 (11)
230 222 ... 47, #08 (5)
231 223 ... 47, #09 (empty)
232 224 ... 48, #10 (1)
233 225 ... 51, #11 (3)
234 226 ... 74, #12 (23)
235 227 ... 85, #13 (11)
236 228 ... 86, #14 (1)
237 229 ... 91, #15 (5)
238 230 ... ])
239 231
240 232 >>> list(_slicechunktodensity(revlog, list(range(16))))
241 233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
242 234 >>> list(_slicechunktodensity(revlog, [0, 15]))
243 235 [[0], [15]]
244 236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
245 237 [[0], [11], [15]]
246 238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
247 239 [[0], [11, 13, 15]]
248 240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
249 241 [[1, 2], [5, 8, 10, 11], [14]]
250 242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
251 243 ... mingapsize=20))
252 244 [[1, 2, 3, 5, 8, 10, 11], [14]]
253 245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
254 246 ... targetdensity=0.95))
255 247 [[1, 2], [5], [8, 10, 11], [14]]
256 248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
257 249 ... targetdensity=0.95, mingapsize=12))
258 250 [[1, 2], [5, 8, 10, 11], [14]]
259 251 """
260 252 start = revlog.start
261 253 length = revlog.length
262 254
263 255 if len(revs) <= 1:
264 256 yield revs
265 257 return
266 258
267 nextrev = len(revlog)
268 nextoffset = revlog.end(nextrev - 1)
269
270 if deltainfo is None:
271 deltachainspan = segmentspan(revlog, revs)
272 chainpayload = sum(length(r) for r in revs)
273 else:
274 deltachainspan = deltainfo.distance
275 chainpayload = deltainfo.compresseddeltalen
259 deltachainspan = segmentspan(revlog, revs)
260 chainpayload = sum(length(r) for r in revs)
276 261
277 262 if deltachainspan < mingapsize:
278 263 yield revs
279 264 return
280 265
281 266 readdata = deltachainspan
282 267
283 268 if deltachainspan:
284 269 density = chainpayload / float(deltachainspan)
285 270 else:
286 271 density = 1.0
287 272
288 273 if density >= targetdensity:
289 274 yield revs
290 275 return
291 276
292 if deltainfo is not None and deltainfo.deltalen:
293 revs = list(revs)
294 revs.append(nextrev)
295
296 277 # Store the gaps in a heap to have them sorted by decreasing size
297 278 gapsheap = []
298 279 heapq.heapify(gapsheap)
299 280 prevend = None
300 281 for i, rev in enumerate(revs):
301 if rev < nextrev:
302 revstart = start(rev)
303 revlen = length(rev)
304 else:
305 revstart = nextoffset
306 revlen = deltainfo.deltalen
282 revstart = start(rev)
283 revlen = length(rev)
307 284
308 285 # Skip empty revisions to form larger holes
309 286 if revlen == 0:
310 287 continue
311 288
312 289 if prevend is not None:
313 290 gapsize = revstart - prevend
314 291 # only consider holes that are large enough
315 292 if gapsize > mingapsize:
316 293 heapq.heappush(gapsheap, (-gapsize, i))
317 294
318 295 prevend = revstart + revlen
319 296
320 297 # Collect the indices of the largest holes until the density is acceptable
321 298 indicesheap = []
322 299 heapq.heapify(indicesheap)
323 300 while gapsheap and density < targetdensity:
324 301 oppgapsize, gapidx = heapq.heappop(gapsheap)
325 302
326 303 heapq.heappush(indicesheap, gapidx)
327 304
328 305 # the gap sizes are stored as negatives to be sorted decreasingly
329 306 # by the heap
330 307 readdata -= (-oppgapsize)
331 308 if readdata > 0:
332 309 density = chainpayload / float(readdata)
333 310 else:
334 311 density = 1.0
335 312
336 313 # Cut the revs at collected indices
337 314 previdx = 0
338 315 while indicesheap:
339 316 idx = heapq.heappop(indicesheap)
340 317
341 318 chunk = _trimchunk(revlog, revs, previdx, idx)
342 319 if chunk:
343 320 yield chunk
344 321
345 322 previdx = idx
346 323
347 324 chunk = _trimchunk(revlog, revs, previdx)
348 325 if chunk:
349 326 yield chunk
350 327
351 328 def _trimchunk(revlog, revs, startidx, endidx=None):
352 329 """returns revs[startidx:endidx] without empty trailing revs
353 330
354 331 Doctest Setup
355 332 >>> revlog = _testrevlog([
356 333 ... 5, #0
357 334 ... 10, #1
358 335 ... 12, #2
359 336 ... 12, #3 (empty)
360 337 ... 17, #4
361 338 ... 21, #5
362 339 ... 21, #6 (empty)
363 340 ... ])
364 341
365 342 Contiguous cases:
366 343 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
367 344 [0, 1, 2, 3, 4, 5]
368 345 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
369 346 [0, 1, 2, 3, 4]
370 347 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
371 348 [0, 1, 2]
372 349 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
373 350 [2]
374 351 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
375 352 [3, 4, 5]
376 353 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
377 354 [3, 4]
378 355
379 356 Discontiguous cases:
380 357 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
381 358 [1, 3, 5]
382 359 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
383 360 [1]
384 361 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
385 362 [3, 5]
386 363 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
387 364 [3, 5]
388 365 """
389 366 length = revlog.length
390 367
391 368 if endidx is None:
392 369 endidx = len(revs)
393 370
394 371 # If we have a non-emtpy delta candidate, there are nothing to trim
395 372 if revs[endidx - 1] < len(revlog):
396 373 # Trim empty revs at the end, except the very first revision of a chain
397 374 while (endidx > 1
398 375 and endidx > startidx
399 376 and length(revs[endidx - 1]) == 0):
400 377 endidx -= 1
401 378
402 379 return revs[startidx:endidx]
403 380
404 381 def segmentspan(revlog, revs, deltainfo=None):
405 382 """Get the byte span of a segment of revisions
406 383
407 384 revs is a sorted array of revision numbers
408 385
409 386 >>> revlog = _testrevlog([
410 387 ... 5, #0
411 388 ... 10, #1
412 389 ... 12, #2
413 390 ... 12, #3 (empty)
414 391 ... 17, #4
415 392 ... ])
416 393
417 394 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
418 395 17
419 396 >>> segmentspan(revlog, [0, 4])
420 397 17
421 398 >>> segmentspan(revlog, [3, 4])
422 399 5
423 400 >>> segmentspan(revlog, [1, 2, 3,])
424 401 7
425 402 >>> segmentspan(revlog, [1, 3])
426 403 7
427 404 """
428 405 if not revs:
429 406 return 0
430 407 if deltainfo is not None and len(revlog) <= revs[-1]:
431 408 if len(revs) == 1:
432 409 return deltainfo.deltalen
433 410 offset = revlog.end(len(revlog) - 1)
434 411 end = deltainfo.deltalen + offset
435 412 else:
436 413 end = revlog.end(revs[-1])
437 414 return end - revlog.start(revs[0])
438 415
439 416 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
440 417 """build full text from a (base, delta) pair and other metadata"""
441 418 # special case deltas which replace entire base; no need to decode
442 419 # base revision. this neatly avoids censored bases, which throw when
443 420 # they're decoded.
444 421 hlen = struct.calcsize(">lll")
445 422 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
446 423 len(delta) - hlen):
447 424 fulltext = delta[hlen:]
448 425 else:
449 426 # deltabase is rawtext before changed by flag processors, which is
450 427 # equivalent to non-raw text
451 428 basetext = revlog.revision(baserev, _df=fh, raw=False)
452 429 fulltext = mdiff.patch(basetext, delta)
453 430
454 431 try:
455 432 res = revlog._processflags(fulltext, flags, 'read', raw=True)
456 433 fulltext, validatehash = res
457 434 if validatehash:
458 435 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
459 436 if flags & REVIDX_ISCENSORED:
460 437 raise error.StorageError(_('node %s is not censored') %
461 438 expectednode)
462 439 except error.CensoredNodeError:
463 440 # must pass the censored index flag to add censored revisions
464 441 if not flags & REVIDX_ISCENSORED:
465 442 raise
466 443 return fulltext
467 444
468 445 @attr.s(slots=True, frozen=True)
469 446 class _deltainfo(object):
470 447 distance = attr.ib()
471 448 deltalen = attr.ib()
472 449 data = attr.ib()
473 450 base = attr.ib()
474 451 chainbase = attr.ib()
475 452 chainlen = attr.ib()
476 453 compresseddeltalen = attr.ib()
477 454 snapshotdepth = attr.ib()
478 455
479 456 def isgooddeltainfo(revlog, deltainfo, revinfo):
480 457 """Returns True if the given delta is good. Good means that it is within
481 458 the disk span, disk size, and chain length bounds that we know to be
482 459 performant."""
483 460 if deltainfo is None:
484 461 return False
485 462
486 463 # - 'deltainfo.distance' is the distance from the base revision --
487 464 # bounding it limits the amount of I/O we need to do.
488 465 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
489 466 # deltas we need to apply -- bounding it limits the amount of CPU
490 467 # we consume.
491 468
492 469 textlen = revinfo.textlen
493 470 defaultmax = textlen * 4
494 471 maxdist = revlog._maxdeltachainspan
495 472 if not maxdist:
496 473 maxdist = deltainfo.distance # ensure the conditional pass
497 474 maxdist = max(maxdist, defaultmax)
498 475
499 476 # Bad delta from read span:
500 477 #
501 478 # If the span of data read is larger than the maximum allowed.
502 479 #
503 480 # In the sparse-revlog case, we rely on the associated "sparse reading"
504 481 # to avoid issue related to the span of data. In theory, it would be
505 482 # possible to build pathological revlog where delta pattern would lead
506 483 # to too many reads. However, they do not happen in practice at all. So
507 484 # we skip the span check entirely.
508 485 if not revlog._sparserevlog and maxdist < deltainfo.distance:
509 486 return False
510 487
511 488 # Bad delta from new delta size:
512 489 #
513 490 # If the delta size is larger than the target text, storing the
514 491 # delta will be inefficient.
515 492 if textlen < deltainfo.deltalen:
516 493 return False
517 494
518 495 # Bad delta from cumulated payload size:
519 496 #
520 497 # If the sum of delta get larger than K * target text length.
521 498 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
522 499 return False
523 500
524 501 # Bad delta from chain length:
525 502 #
526 503 # If the number of delta in the chain gets too high.
527 504 if (revlog._maxchainlen
528 505 and revlog._maxchainlen < deltainfo.chainlen):
529 506 return False
530 507
531 508 # bad delta from intermediate snapshot size limit
532 509 #
533 510 # If an intermediate snapshot size is higher than the limit. The
534 511 # limit exist to prevent endless chain of intermediate delta to be
535 512 # created.
536 513 if (deltainfo.snapshotdepth is not None and
537 514 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
538 515 return False
539 516
540 517 # bad delta if new intermediate snapshot is larger than the previous
541 518 # snapshot
542 519 if (deltainfo.snapshotdepth
543 520 and revlog.length(deltainfo.base) < deltainfo.deltalen):
544 521 return False
545 522
546 523 return True
547 524
548 525 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
549 526 """Provides group of revision to be tested as delta base
550 527
551 528 This top level function focus on emitting groups with unique and worthwhile
552 529 content. See _raw_candidate_groups for details about the group order.
553 530 """
554 531 # should we try to build a delta?
555 532 if not (len(revlog) and revlog._storedeltachains):
556 533 yield None
557 534 return
558 535
559 536 deltalength = revlog.length
560 537 deltaparent = revlog.deltaparent
561 538 good = None
562 539
563 540 deltas_limit = textlen * LIMIT_DELTA2TEXT
564 541
565 542 tested = set([nullrev])
566 543 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
567 544 while True:
568 545 temptative = candidates.send(good)
569 546 if temptative is None:
570 547 break
571 548 group = []
572 549 for rev in temptative:
573 550 # skip over empty delta (no need to include them in a chain)
574 551 while (revlog._generaldelta
575 552 and not (rev == nullrev
576 553 or rev in tested
577 554 or deltalength(rev))):
578 555 tested.add(rev)
579 556 rev = deltaparent(rev)
580 557 # filter out revision we tested already
581 558 if rev in tested:
582 559 continue
583 560 tested.add(rev)
584 561 # filter out delta base that will never produce good delta
585 562 if deltas_limit < revlog.length(rev):
586 563 continue
587 564 # no need to try a delta against nullrev, this will be done as a
588 565 # last resort.
589 566 if rev == nullrev:
590 567 continue
591 568 # no delta for rawtext-changing revs (see "candelta" for why)
592 569 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
593 570 continue
594 571 group.append(rev)
595 572 if group:
596 573 # XXX: in the sparse revlog case, group can become large,
597 574 # impacting performances. Some bounding or slicing mecanism
598 575 # would help to reduce this impact.
599 576 good = yield tuple(group)
600 577 yield None
601 578
602 579 def _findsnapshots(revlog, cache, start_rev):
603 580 """find snapshot from start_rev to tip"""
604 581 deltaparent = revlog.deltaparent
605 582 issnapshot = revlog.issnapshot
606 583 for rev in revlog.revs(start_rev):
607 584 if issnapshot(rev):
608 585 cache[deltaparent(rev)].append(rev)
609 586
610 587 def _refinedgroups(revlog, p1, p2, cachedelta):
611 588 good = None
612 589 # First we try to reuse a the delta contained in the bundle.
613 590 # (or from the source revlog)
614 591 #
615 592 # This logic only applies to general delta repositories and can be disabled
616 593 # through configuration. Disabling reuse source delta is useful when
617 594 # we want to make sure we recomputed "optimal" deltas.
618 595 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
619 596 # Assume what we received from the server is a good choice
620 597 # build delta will reuse the cache
621 598 good = yield (cachedelta[0],)
622 599 if good is not None:
623 600 yield None
624 601 return
625 602 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
626 603 good = yield candidates
627 604 if good is not None:
628 605 break
629 606
630 607 # If sparse revlog is enabled, we can try to refine the available deltas
631 608 if not revlog._sparserevlog:
632 609 yield None
633 610 return
634 611
635 612 # if we have a refinable value, try to refine it
636 613 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
637 614 # refine snapshot down
638 615 previous = None
639 616 while previous != good:
640 617 previous = good
641 618 base = revlog.deltaparent(good)
642 619 if base == nullrev:
643 620 break
644 621 good = yield (base,)
645 622 # refine snapshot up
646 623 #
647 624 # XXX the _findsnapshots call can be expensive and is "duplicated" with
648 625 # the one done in `_rawgroups`. Once we start working on performance,
649 626 # we should make the two logics share this computation.
650 627 snapshots = collections.defaultdict(list)
651 628 _findsnapshots(revlog, snapshots, good + 1)
652 629 previous = None
653 630 while good != previous:
654 631 previous = good
655 632 children = tuple(sorted(c for c in snapshots[good]))
656 633 good = yield children
657 634
658 635 # we have found nothing
659 636 yield None
660 637
661 638 def _rawgroups(revlog, p1, p2, cachedelta):
662 639 """Provides group of revision to be tested as delta base
663 640
664 641 This lower level function focus on emitting delta theorically interresting
665 642 without looking it any practical details.
666 643
667 644 The group order aims at providing fast or small candidates first.
668 645 """
669 646 gdelta = revlog._generaldelta
670 647 sparse = revlog._sparserevlog
671 648 curr = len(revlog)
672 649 prev = curr - 1
673 650 deltachain = lambda rev: revlog._deltachain(rev)[0]
674 651
675 652 if gdelta:
676 653 # exclude already lazy tested base if any
677 654 parents = [p for p in (p1, p2) if p != nullrev]
678 655
679 656 if not revlog._deltabothparents and len(parents) == 2:
680 657 parents.sort()
681 658 # To minimize the chance of having to build a fulltext,
682 659 # pick first whichever parent is closest to us (max rev)
683 660 yield (parents[1],)
684 661 # then the other one (min rev) if the first did not fit
685 662 yield (parents[0],)
686 663 elif len(parents) > 0:
687 664 # Test all parents (1 or 2), and keep the best candidate
688 665 yield parents
689 666
690 667 if sparse and parents:
691 668 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
692 669 # See if we can use an existing snapshot in the parent chains to use as
693 670 # a base for a new intermediate-snapshot
694 671 #
695 672 # search for snapshot in parents delta chain
696 673 # map: snapshot-level: snapshot-rev
697 674 parents_snaps = collections.defaultdict(set)
698 675 candidate_chains = [deltachain(p) for p in parents]
699 676 for chain in candidate_chains:
700 677 for idx, s in enumerate(chain):
701 678 if not revlog.issnapshot(s):
702 679 break
703 680 parents_snaps[idx].add(s)
704 681 snapfloor = min(parents_snaps[0]) + 1
705 682 _findsnapshots(revlog, snapshots, snapfloor)
706 683 # search for the highest "unrelated" revision
707 684 #
708 685 # Adding snapshots used by "unrelated" revision increase the odd we
709 686 # reuse an independant, yet better snapshot chain.
710 687 #
711 688 # XXX instead of building a set of revisions, we could lazily enumerate
712 689 # over the chains. That would be more efficient, however we stick to
713 690 # simple code for now.
714 691 all_revs = set()
715 692 for chain in candidate_chains:
716 693 all_revs.update(chain)
717 694 other = None
718 695 for r in revlog.revs(prev, snapfloor):
719 696 if r not in all_revs:
720 697 other = r
721 698 break
722 699 if other is not None:
723 700 # To avoid unfair competition, we won't use unrelated intermediate
724 701 # snapshot that are deeper than the ones from the parent delta
725 702 # chain.
726 703 max_depth = max(parents_snaps.keys())
727 704 chain = deltachain(other)
728 705 for idx, s in enumerate(chain):
729 706 if s < snapfloor:
730 707 continue
731 708 if max_depth < idx:
732 709 break
733 710 if not revlog.issnapshot(s):
734 711 break
735 712 parents_snaps[idx].add(s)
736 713 # Test them as possible intermediate snapshot base
737 714 # We test them from highest to lowest level. High level one are more
738 715 # likely to result in small delta
739 716 floor = None
740 717 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
741 718 siblings = set()
742 719 for s in snaps:
743 720 siblings.update(snapshots[s])
744 721 # Before considering making a new intermediate snapshot, we check
745 722 # if an existing snapshot, children of base we consider, would be
746 723 # suitable.
747 724 #
748 725 # It give a change to reuse a delta chain "unrelated" to the
749 726 # current revision instead of starting our own. Without such
750 727 # re-use, topological branches would keep reopening new chains.
751 728 # Creating more and more snapshot as the repository grow.
752 729
753 730 if floor is not None:
754 731 # We only do this for siblings created after the one in our
755 732 # parent's delta chain. Those created before has less chances
756 733 # to be valid base since our ancestors had to create a new
757 734 # snapshot.
758 735 siblings = [r for r in siblings if floor < r]
759 736 yield tuple(sorted(siblings))
760 737 # then test the base from our parent's delta chain.
761 738 yield tuple(sorted(snaps))
762 739 floor = min(snaps)
763 740 # No suitable base found in the parent chain, search if any full
764 741 # snapshots emitted since parent's base would be a suitable base for an
765 742 # intermediate snapshot.
766 743 #
767 744 # It give a chance to reuse a delta chain unrelated to the current
768 745 # revisions instead of starting our own. Without such re-use,
769 746 # topological branches would keep reopening new full chains. Creating
770 747 # more and more snapshot as the repository grow.
771 748 yield tuple(snapshots[nullrev])
772 749
773 750 if not sparse:
774 751 # other approach failed try against prev to hopefully save us a
775 752 # fulltext.
776 753 yield (prev,)
777 754
778 755 class deltacomputer(object):
779 756 def __init__(self, revlog):
780 757 self.revlog = revlog
781 758
782 759 def buildtext(self, revinfo, fh):
783 760 """Builds a fulltext version of a revision
784 761
785 762 revinfo: _revisioninfo instance that contains all needed info
786 763 fh: file handle to either the .i or the .d revlog file,
787 764 depending on whether it is inlined or not
788 765 """
789 766 btext = revinfo.btext
790 767 if btext[0] is not None:
791 768 return btext[0]
792 769
793 770 revlog = self.revlog
794 771 cachedelta = revinfo.cachedelta
795 772 baserev = cachedelta[0]
796 773 delta = cachedelta[1]
797 774
798 775 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
799 776 revinfo.p1, revinfo.p2,
800 777 revinfo.flags, revinfo.node)
801 778 return fulltext
802 779
803 780 def _builddeltadiff(self, base, revinfo, fh):
804 781 revlog = self.revlog
805 782 t = self.buildtext(revinfo, fh)
806 783 if revlog.iscensored(base):
807 784 # deltas based on a censored revision must replace the
808 785 # full content in one patch, so delta works everywhere
809 786 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
810 787 delta = header + t
811 788 else:
812 789 ptext = revlog.revision(base, _df=fh, raw=True)
813 790 delta = mdiff.textdiff(ptext, t)
814 791
815 792 return delta
816 793
817 794 def _builddeltainfo(self, revinfo, base, fh):
818 795 # can we use the cached delta?
819 796 delta = None
820 797 if revinfo.cachedelta:
821 798 cachebase, cachediff = revinfo.cachedelta
822 799 #check if the diff still apply
823 800 currentbase = cachebase
824 801 while (currentbase != nullrev
825 802 and currentbase != base
826 803 and self.revlog.length(currentbase) == 0):
827 804 currentbase = self.revlog.deltaparent(currentbase)
828 805 if currentbase == base:
829 806 delta = revinfo.cachedelta[1]
830 807 if delta is None:
831 808 delta = self._builddeltadiff(base, revinfo, fh)
832 809 revlog = self.revlog
833 810 header, data = revlog.compress(delta)
834 811 deltalen = len(header) + len(data)
835 812 chainbase = revlog.chainbase(base)
836 813 offset = revlog.end(len(revlog) - 1)
837 814 dist = deltalen + offset - revlog.start(chainbase)
838 815 if revlog._generaldelta:
839 816 deltabase = base
840 817 else:
841 818 deltabase = chainbase
842 819 chainlen, compresseddeltalen = revlog._chaininfo(base)
843 820 chainlen += 1
844 821 compresseddeltalen += deltalen
845 822
846 823 revlog = self.revlog
847 824 snapshotdepth = None
848 825 if deltabase == nullrev:
849 826 snapshotdepth = 0
850 827 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
851 828 # A delta chain should always be one full snapshot,
852 829 # zero or more semi-snapshots, and zero or more deltas
853 830 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
854 831 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
855 832 snapshotdepth = len(revlog._deltachain(deltabase)[0])
856 833
857 834 return _deltainfo(dist, deltalen, (header, data), deltabase,
858 835 chainbase, chainlen, compresseddeltalen,
859 836 snapshotdepth)
860 837
861 838 def _fullsnapshotinfo(self, fh, revinfo):
862 839 curr = len(self.revlog)
863 840 rawtext = self.buildtext(revinfo, fh)
864 841 data = self.revlog.compress(rawtext)
865 842 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
866 843 deltabase = chainbase = curr
867 844 snapshotdepth = 0
868 845 chainlen = 1
869 846
870 847 return _deltainfo(dist, deltalen, data, deltabase,
871 848 chainbase, chainlen, compresseddeltalen,
872 849 snapshotdepth)
873 850
874 851 def finddeltainfo(self, revinfo, fh):
875 852 """Find an acceptable delta against a candidate revision
876 853
877 854 revinfo: information about the revision (instance of _revisioninfo)
878 855 fh: file handle to either the .i or the .d revlog file,
879 856 depending on whether it is inlined or not
880 857
881 858 Returns the first acceptable candidate revision, as ordered by
882 859 _candidategroups
883 860
884 861 If no suitable deltabase is found, we return delta info for a full
885 862 snapshot.
886 863 """
887 864 if not revinfo.textlen:
888 865 return self._fullsnapshotinfo(fh, revinfo)
889 866
890 867 # no delta for flag processor revision (see "candelta" for why)
891 868 # not calling candelta since only one revision needs test, also to
892 869 # avoid overhead fetching flags again.
893 870 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
894 871 return self._fullsnapshotinfo(fh, revinfo)
895 872
896 873 cachedelta = revinfo.cachedelta
897 874 p1 = revinfo.p1
898 875 p2 = revinfo.p2
899 876 revlog = self.revlog
900 877
901 878 deltainfo = None
902 879 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
903 880 groups = _candidategroups(self.revlog, revinfo.textlen,
904 881 p1r, p2r, cachedelta)
905 882 candidaterevs = next(groups)
906 883 while candidaterevs is not None:
907 884 nominateddeltas = []
908 885 if deltainfo is not None:
909 886 # if we already found a good delta,
910 887 # challenge it against refined candidates
911 888 nominateddeltas.append(deltainfo)
912 889 for candidaterev in candidaterevs:
913 890 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
914 891 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
915 892 nominateddeltas.append(candidatedelta)
916 893 if nominateddeltas:
917 894 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
918 895 if deltainfo is not None:
919 896 candidaterevs = groups.send(deltainfo.base)
920 897 else:
921 898 candidaterevs = next(groups)
922 899
923 900 if deltainfo is None:
924 901 deltainfo = self._fullsnapshotinfo(fh, revinfo)
925 902 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now