##// END OF EJS Templates
doctest: add a `issnapshot` method to _testrevlog...
Boris Feld -
r40677:85b14f0d default
parent child Browse files
Show More
@@ -1,895 +1,899 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 33 )
34 34
35 35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 36 LIMIT_DELTA2TEXT = 2
37 37
38 38 class _testrevlog(object):
39 39 """minimalist fake revlog to use in doctests"""
40 40
41 def __init__(self, data, density=0.5, mingap=0):
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 42 """data is an list of revision payload boundaries"""
43 43 self._data = data
44 44 self._srdensitythreshold = density
45 45 self._srmingapsize = mingap
46 self._snapshot = set(snapshot)
46 47
47 48 def start(self, rev):
48 49 if rev == 0:
49 50 return 0
50 51 return self._data[rev - 1]
51 52
52 53 def end(self, rev):
53 54 return self._data[rev]
54 55
55 56 def length(self, rev):
56 57 return self.end(rev) - self.start(rev)
57 58
58 59 def __len__(self):
59 60 return len(self._data)
60 61
62 def issnapshot(self, rev):
63 return rev in self._snapshot
64
61 65 def slicechunk(revlog, revs, targetsize=None):
62 66 """slice revs to reduce the amount of unrelated data to be read from disk.
63 67
64 68 ``revs`` is sliced into groups that should be read in one time.
65 69 Assume that revs are sorted.
66 70
67 71 The initial chunk is sliced until the overall density (payload/chunks-span
68 72 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 73 `revlog._srmingapsize` is skipped.
70 74
71 75 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 76 For consistency with other slicing choice, this limit won't go lower than
73 77 `revlog._srmingapsize`.
74 78
75 79 If individual revisions chunk are larger than this limit, they will still
76 80 be raised individually.
77 81
78 82 >>> revlog = _testrevlog([
79 83 ... 5, #00 (5)
80 84 ... 10, #01 (5)
81 85 ... 12, #02 (2)
82 86 ... 12, #03 (empty)
83 87 ... 27, #04 (15)
84 88 ... 31, #05 (4)
85 89 ... 31, #06 (empty)
86 90 ... 42, #07 (11)
87 91 ... 47, #08 (5)
88 92 ... 47, #09 (empty)
89 93 ... 48, #10 (1)
90 94 ... 51, #11 (3)
91 95 ... 74, #12 (23)
92 96 ... 85, #13 (11)
93 97 ... 86, #14 (1)
94 98 ... 91, #15 (5)
95 99 ... ])
96 100
97 101 >>> list(slicechunk(revlog, list(range(16))))
98 102 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 103 >>> list(slicechunk(revlog, [0, 15]))
100 104 [[0], [15]]
101 105 >>> list(slicechunk(revlog, [0, 11, 15]))
102 106 [[0], [11], [15]]
103 107 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 108 [[0], [11, 13, 15]]
105 109 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 110 [[1, 2], [5, 8, 10, 11], [14]]
107 111
108 112 Slicing with a maximum chunk size
109 113 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 114 [[0], [11], [13], [15]]
111 115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 116 [[0], [11], [13, 15]]
113 117 """
114 118 if targetsize is not None:
115 119 targetsize = max(targetsize, revlog._srmingapsize)
116 120 # targetsize should not be specified when evaluating delta candidates:
117 121 # * targetsize is used to ensure we stay within specification when reading,
118 122 for chunk in _slicechunktodensity(revlog, revs,
119 123 revlog._srdensitythreshold,
120 124 revlog._srmingapsize):
121 125 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
122 126 yield subchunk
123 127
124 128 def _slicechunktosize(revlog, revs, targetsize=None):
125 129 """slice revs to match the target size
126 130
127 131 This is intended to be used on chunk that density slicing selected by that
128 132 are still too large compared to the read garantee of revlog. This might
129 133 happens when "minimal gap size" interrupted the slicing or when chain are
130 134 built in a way that create large blocks next to each other.
131 135
132 136 >>> revlog = _testrevlog([
133 137 ... 3, #0 (3)
134 138 ... 5, #1 (2)
135 139 ... 6, #2 (1)
136 140 ... 8, #3 (2)
137 141 ... 8, #4 (empty)
138 142 ... 11, #5 (3)
139 143 ... 12, #6 (1)
140 144 ... 13, #7 (1)
141 145 ... 14, #8 (1)
142 146 ... ])
143 147
144 148 Cases where chunk is already small enough
145 149 >>> list(_slicechunktosize(revlog, [0], 3))
146 150 [[0]]
147 151 >>> list(_slicechunktosize(revlog, [6, 7], 3))
148 152 [[6, 7]]
149 153 >>> list(_slicechunktosize(revlog, [0], None))
150 154 [[0]]
151 155 >>> list(_slicechunktosize(revlog, [6, 7], None))
152 156 [[6, 7]]
153 157
154 158 cases where we need actual slicing
155 159 >>> list(_slicechunktosize(revlog, [0, 1], 3))
156 160 [[0], [1]]
157 161 >>> list(_slicechunktosize(revlog, [1, 3], 3))
158 162 [[1], [3]]
159 163 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
160 164 [[1, 2], [3]]
161 165 >>> list(_slicechunktosize(revlog, [3, 5], 3))
162 166 [[3], [5]]
163 167 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
164 168 [[3], [5]]
165 169 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
166 170 [[5], [6, 7, 8]]
167 171 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
168 172 [[0], [1, 2], [3], [5], [6, 7, 8]]
169 173
170 174 Case with too large individual chunk (must return valid chunk)
171 175 >>> list(_slicechunktosize(revlog, [0, 1], 2))
172 176 [[0], [1]]
173 177 >>> list(_slicechunktosize(revlog, [1, 3], 1))
174 178 [[1], [3]]
175 179 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
176 180 [[3], [5]]
177 181 """
178 182 assert targetsize is None or 0 <= targetsize
179 183 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
180 184 yield revs
181 185 return
182 186
183 187 startrevidx = 0
184 188 startdata = revlog.start(revs[0])
185 189 endrevidx = 0
186 190 iterrevs = enumerate(revs)
187 191 next(iterrevs) # skip first rev.
188 192 for idx, r in iterrevs:
189 193 span = revlog.end(r) - startdata
190 194 if span <= targetsize:
191 195 endrevidx = idx
192 196 else:
193 197 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
194 198 if chunk:
195 199 yield chunk
196 200 startrevidx = idx
197 201 startdata = revlog.start(r)
198 202 endrevidx = idx
199 203 yield _trimchunk(revlog, revs, startrevidx)
200 204
201 205 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
202 206 mingapsize=0):
203 207 """slice revs to reduce the amount of unrelated data to be read from disk.
204 208
205 209 ``revs`` is sliced into groups that should be read in one time.
206 210 Assume that revs are sorted.
207 211
208 212 The initial chunk is sliced until the overall density (payload/chunks-span
209 213 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
210 214 skipped.
211 215
212 216 >>> revlog = _testrevlog([
213 217 ... 5, #00 (5)
214 218 ... 10, #01 (5)
215 219 ... 12, #02 (2)
216 220 ... 12, #03 (empty)
217 221 ... 27, #04 (15)
218 222 ... 31, #05 (4)
219 223 ... 31, #06 (empty)
220 224 ... 42, #07 (11)
221 225 ... 47, #08 (5)
222 226 ... 47, #09 (empty)
223 227 ... 48, #10 (1)
224 228 ... 51, #11 (3)
225 229 ... 74, #12 (23)
226 230 ... 85, #13 (11)
227 231 ... 86, #14 (1)
228 232 ... 91, #15 (5)
229 233 ... ])
230 234
231 235 >>> list(_slicechunktodensity(revlog, list(range(16))))
232 236 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
233 237 >>> list(_slicechunktodensity(revlog, [0, 15]))
234 238 [[0], [15]]
235 239 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
236 240 [[0], [11], [15]]
237 241 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
238 242 [[0], [11, 13, 15]]
239 243 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
240 244 [[1, 2], [5, 8, 10, 11], [14]]
241 245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
242 246 ... mingapsize=20))
243 247 [[1, 2, 3, 5, 8, 10, 11], [14]]
244 248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
245 249 ... targetdensity=0.95))
246 250 [[1, 2], [5], [8, 10, 11], [14]]
247 251 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
248 252 ... targetdensity=0.95, mingapsize=12))
249 253 [[1, 2], [5, 8, 10, 11], [14]]
250 254 """
251 255 start = revlog.start
252 256 length = revlog.length
253 257
254 258 if len(revs) <= 1:
255 259 yield revs
256 260 return
257 261
258 262 deltachainspan = segmentspan(revlog, revs)
259 263
260 264 if deltachainspan < mingapsize:
261 265 yield revs
262 266 return
263 267
264 268 readdata = deltachainspan
265 269 chainpayload = sum(length(r) for r in revs)
266 270
267 271 if deltachainspan:
268 272 density = chainpayload / float(deltachainspan)
269 273 else:
270 274 density = 1.0
271 275
272 276 if density >= targetdensity:
273 277 yield revs
274 278 return
275 279
276 280 # Store the gaps in a heap to have them sorted by decreasing size
277 281 gaps = []
278 282 prevend = None
279 283 for i, rev in enumerate(revs):
280 284 revstart = start(rev)
281 285 revlen = length(rev)
282 286
283 287 # Skip empty revisions to form larger holes
284 288 if revlen == 0:
285 289 continue
286 290
287 291 if prevend is not None:
288 292 gapsize = revstart - prevend
289 293 # only consider holes that are large enough
290 294 if gapsize > mingapsize:
291 295 gaps.append((gapsize, i))
292 296
293 297 prevend = revstart + revlen
294 298 # sort the gaps to pop them from largest to small
295 299 gaps.sort()
296 300
297 301 # Collect the indices of the largest holes until the density is acceptable
298 302 selected = []
299 303 while gaps and density < targetdensity:
300 304 gapsize, gapidx = gaps.pop()
301 305
302 306 selected.append(gapidx)
303 307
304 308 # the gap sizes are stored as negatives to be sorted decreasingly
305 309 # by the heap
306 310 readdata -= gapsize
307 311 if readdata > 0:
308 312 density = chainpayload / float(readdata)
309 313 else:
310 314 density = 1.0
311 315 selected.sort()
312 316
313 317 # Cut the revs at collected indices
314 318 previdx = 0
315 319 for idx in selected:
316 320
317 321 chunk = _trimchunk(revlog, revs, previdx, idx)
318 322 if chunk:
319 323 yield chunk
320 324
321 325 previdx = idx
322 326
323 327 chunk = _trimchunk(revlog, revs, previdx)
324 328 if chunk:
325 329 yield chunk
326 330
327 331 def _trimchunk(revlog, revs, startidx, endidx=None):
328 332 """returns revs[startidx:endidx] without empty trailing revs
329 333
330 334 Doctest Setup
331 335 >>> revlog = _testrevlog([
332 336 ... 5, #0
333 337 ... 10, #1
334 338 ... 12, #2
335 339 ... 12, #3 (empty)
336 340 ... 17, #4
337 341 ... 21, #5
338 342 ... 21, #6 (empty)
339 343 ... ])
340 344
341 345 Contiguous cases:
342 346 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
343 347 [0, 1, 2, 3, 4, 5]
344 348 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
345 349 [0, 1, 2, 3, 4]
346 350 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
347 351 [0, 1, 2]
348 352 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
349 353 [2]
350 354 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
351 355 [3, 4, 5]
352 356 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
353 357 [3, 4]
354 358
355 359 Discontiguous cases:
356 360 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
357 361 [1, 3, 5]
358 362 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
359 363 [1]
360 364 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
361 365 [3, 5]
362 366 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
363 367 [3, 5]
364 368 """
365 369 length = revlog.length
366 370
367 371 if endidx is None:
368 372 endidx = len(revs)
369 373
370 374 # If we have a non-emtpy delta candidate, there are nothing to trim
371 375 if revs[endidx - 1] < len(revlog):
372 376 # Trim empty revs at the end, except the very first revision of a chain
373 377 while (endidx > 1
374 378 and endidx > startidx
375 379 and length(revs[endidx - 1]) == 0):
376 380 endidx -= 1
377 381
378 382 return revs[startidx:endidx]
379 383
380 384 def segmentspan(revlog, revs):
381 385 """Get the byte span of a segment of revisions
382 386
383 387 revs is a sorted array of revision numbers
384 388
385 389 >>> revlog = _testrevlog([
386 390 ... 5, #0
387 391 ... 10, #1
388 392 ... 12, #2
389 393 ... 12, #3 (empty)
390 394 ... 17, #4
391 395 ... ])
392 396
393 397 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
394 398 17
395 399 >>> segmentspan(revlog, [0, 4])
396 400 17
397 401 >>> segmentspan(revlog, [3, 4])
398 402 5
399 403 >>> segmentspan(revlog, [1, 2, 3,])
400 404 7
401 405 >>> segmentspan(revlog, [1, 3])
402 406 7
403 407 """
404 408 if not revs:
405 409 return 0
406 410 end = revlog.end(revs[-1])
407 411 return end - revlog.start(revs[0])
408 412
409 413 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
410 414 """build full text from a (base, delta) pair and other metadata"""
411 415 # special case deltas which replace entire base; no need to decode
412 416 # base revision. this neatly avoids censored bases, which throw when
413 417 # they're decoded.
414 418 hlen = struct.calcsize(">lll")
415 419 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
416 420 len(delta) - hlen):
417 421 fulltext = delta[hlen:]
418 422 else:
419 423 # deltabase is rawtext before changed by flag processors, which is
420 424 # equivalent to non-raw text
421 425 basetext = revlog.revision(baserev, _df=fh, raw=False)
422 426 fulltext = mdiff.patch(basetext, delta)
423 427
424 428 try:
425 429 res = revlog._processflags(fulltext, flags, 'read', raw=True)
426 430 fulltext, validatehash = res
427 431 if validatehash:
428 432 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
429 433 if flags & REVIDX_ISCENSORED:
430 434 raise error.StorageError(_('node %s is not censored') %
431 435 expectednode)
432 436 except error.CensoredNodeError:
433 437 # must pass the censored index flag to add censored revisions
434 438 if not flags & REVIDX_ISCENSORED:
435 439 raise
436 440 return fulltext
437 441
438 442 @attr.s(slots=True, frozen=True)
439 443 class _deltainfo(object):
440 444 distance = attr.ib()
441 445 deltalen = attr.ib()
442 446 data = attr.ib()
443 447 base = attr.ib()
444 448 chainbase = attr.ib()
445 449 chainlen = attr.ib()
446 450 compresseddeltalen = attr.ib()
447 451 snapshotdepth = attr.ib()
448 452
449 453 def isgooddeltainfo(revlog, deltainfo, revinfo):
450 454 """Returns True if the given delta is good. Good means that it is within
451 455 the disk span, disk size, and chain length bounds that we know to be
452 456 performant."""
453 457 if deltainfo is None:
454 458 return False
455 459
456 460 # - 'deltainfo.distance' is the distance from the base revision --
457 461 # bounding it limits the amount of I/O we need to do.
458 462 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
459 463 # deltas we need to apply -- bounding it limits the amount of CPU
460 464 # we consume.
461 465
462 466 textlen = revinfo.textlen
463 467 defaultmax = textlen * 4
464 468 maxdist = revlog._maxdeltachainspan
465 469 if not maxdist:
466 470 maxdist = deltainfo.distance # ensure the conditional pass
467 471 maxdist = max(maxdist, defaultmax)
468 472
469 473 # Bad delta from read span:
470 474 #
471 475 # If the span of data read is larger than the maximum allowed.
472 476 #
473 477 # In the sparse-revlog case, we rely on the associated "sparse reading"
474 478 # to avoid issue related to the span of data. In theory, it would be
475 479 # possible to build pathological revlog where delta pattern would lead
476 480 # to too many reads. However, they do not happen in practice at all. So
477 481 # we skip the span check entirely.
478 482 if not revlog._sparserevlog and maxdist < deltainfo.distance:
479 483 return False
480 484
481 485 # Bad delta from new delta size:
482 486 #
483 487 # If the delta size is larger than the target text, storing the
484 488 # delta will be inefficient.
485 489 if textlen < deltainfo.deltalen:
486 490 return False
487 491
488 492 # Bad delta from cumulated payload size:
489 493 #
490 494 # If the sum of delta get larger than K * target text length.
491 495 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
492 496 return False
493 497
494 498 # Bad delta from chain length:
495 499 #
496 500 # If the number of delta in the chain gets too high.
497 501 if (revlog._maxchainlen
498 502 and revlog._maxchainlen < deltainfo.chainlen):
499 503 return False
500 504
501 505 # bad delta from intermediate snapshot size limit
502 506 #
503 507 # If an intermediate snapshot size is higher than the limit. The
504 508 # limit exist to prevent endless chain of intermediate delta to be
505 509 # created.
506 510 if (deltainfo.snapshotdepth is not None and
507 511 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
508 512 return False
509 513
510 514 # bad delta if new intermediate snapshot is larger than the previous
511 515 # snapshot
512 516 if (deltainfo.snapshotdepth
513 517 and revlog.length(deltainfo.base) < deltainfo.deltalen):
514 518 return False
515 519
516 520 return True
517 521
518 522 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
519 523 """Provides group of revision to be tested as delta base
520 524
521 525 This top level function focus on emitting groups with unique and worthwhile
522 526 content. See _raw_candidate_groups for details about the group order.
523 527 """
524 528 # should we try to build a delta?
525 529 if not (len(revlog) and revlog._storedeltachains):
526 530 yield None
527 531 return
528 532
529 533 deltalength = revlog.length
530 534 deltaparent = revlog.deltaparent
531 535 good = None
532 536
533 537 deltas_limit = textlen * LIMIT_DELTA2TEXT
534 538
535 539 tested = set([nullrev])
536 540 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
537 541 while True:
538 542 temptative = candidates.send(good)
539 543 if temptative is None:
540 544 break
541 545 group = []
542 546 for rev in temptative:
543 547 # skip over empty delta (no need to include them in a chain)
544 548 while (revlog._generaldelta
545 549 and not (rev == nullrev
546 550 or rev in tested
547 551 or deltalength(rev))):
548 552 tested.add(rev)
549 553 rev = deltaparent(rev)
550 554 # filter out revision we tested already
551 555 if rev in tested:
552 556 continue
553 557 tested.add(rev)
554 558 # filter out delta base that will never produce good delta
555 559 if deltas_limit < revlog.length(rev):
556 560 continue
557 561 # no need to try a delta against nullrev, this will be done as a
558 562 # last resort.
559 563 if rev == nullrev:
560 564 continue
561 565 # no delta for rawtext-changing revs (see "candelta" for why)
562 566 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
563 567 continue
564 568 group.append(rev)
565 569 if group:
566 570 # XXX: in the sparse revlog case, group can become large,
567 571 # impacting performances. Some bounding or slicing mecanism
568 572 # would help to reduce this impact.
569 573 good = yield tuple(group)
570 574 yield None
571 575
572 576 def _findsnapshots(revlog, cache, start_rev):
573 577 """find snapshot from start_rev to tip"""
574 578 deltaparent = revlog.deltaparent
575 579 issnapshot = revlog.issnapshot
576 580 for rev in revlog.revs(start_rev):
577 581 if issnapshot(rev):
578 582 cache[deltaparent(rev)].append(rev)
579 583
580 584 def _refinedgroups(revlog, p1, p2, cachedelta):
581 585 good = None
582 586 # First we try to reuse a the delta contained in the bundle.
583 587 # (or from the source revlog)
584 588 #
585 589 # This logic only applies to general delta repositories and can be disabled
586 590 # through configuration. Disabling reuse source delta is useful when
587 591 # we want to make sure we recomputed "optimal" deltas.
588 592 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
589 593 # Assume what we received from the server is a good choice
590 594 # build delta will reuse the cache
591 595 good = yield (cachedelta[0],)
592 596 if good is not None:
593 597 yield None
594 598 return
595 599 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
596 600 good = yield candidates
597 601 if good is not None:
598 602 break
599 603
600 604 # If sparse revlog is enabled, we can try to refine the available deltas
601 605 if not revlog._sparserevlog:
602 606 yield None
603 607 return
604 608
605 609 # if we have a refinable value, try to refine it
606 610 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
607 611 # refine snapshot down
608 612 previous = None
609 613 while previous != good:
610 614 previous = good
611 615 base = revlog.deltaparent(good)
612 616 if base == nullrev:
613 617 break
614 618 good = yield (base,)
615 619 # refine snapshot up
616 620 #
617 621 # XXX the _findsnapshots call can be expensive and is "duplicated" with
618 622 # the one done in `_rawgroups`. Once we start working on performance,
619 623 # we should make the two logics share this computation.
620 624 snapshots = collections.defaultdict(list)
621 625 _findsnapshots(revlog, snapshots, good + 1)
622 626 previous = None
623 627 while good != previous:
624 628 previous = good
625 629 children = tuple(sorted(c for c in snapshots[good]))
626 630 good = yield children
627 631
628 632 # we have found nothing
629 633 yield None
630 634
631 635 def _rawgroups(revlog, p1, p2, cachedelta):
632 636 """Provides group of revision to be tested as delta base
633 637
634 638 This lower level function focus on emitting delta theorically interresting
635 639 without looking it any practical details.
636 640
637 641 The group order aims at providing fast or small candidates first.
638 642 """
639 643 gdelta = revlog._generaldelta
640 644 sparse = revlog._sparserevlog
641 645 curr = len(revlog)
642 646 prev = curr - 1
643 647 deltachain = lambda rev: revlog._deltachain(rev)[0]
644 648
645 649 if gdelta:
646 650 # exclude already lazy tested base if any
647 651 parents = [p for p in (p1, p2) if p != nullrev]
648 652
649 653 if not revlog._deltabothparents and len(parents) == 2:
650 654 parents.sort()
651 655 # To minimize the chance of having to build a fulltext,
652 656 # pick first whichever parent is closest to us (max rev)
653 657 yield (parents[1],)
654 658 # then the other one (min rev) if the first did not fit
655 659 yield (parents[0],)
656 660 elif len(parents) > 0:
657 661 # Test all parents (1 or 2), and keep the best candidate
658 662 yield parents
659 663
660 664 if sparse and parents:
661 665 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
662 666 # See if we can use an existing snapshot in the parent chains to use as
663 667 # a base for a new intermediate-snapshot
664 668 #
665 669 # search for snapshot in parents delta chain
666 670 # map: snapshot-level: snapshot-rev
667 671 parents_snaps = collections.defaultdict(set)
668 672 candidate_chains = [deltachain(p) for p in parents]
669 673 for chain in candidate_chains:
670 674 for idx, s in enumerate(chain):
671 675 if not revlog.issnapshot(s):
672 676 break
673 677 parents_snaps[idx].add(s)
674 678 snapfloor = min(parents_snaps[0]) + 1
675 679 _findsnapshots(revlog, snapshots, snapfloor)
676 680 # search for the highest "unrelated" revision
677 681 #
678 682 # Adding snapshots used by "unrelated" revision increase the odd we
679 683 # reuse an independant, yet better snapshot chain.
680 684 #
681 685 # XXX instead of building a set of revisions, we could lazily enumerate
682 686 # over the chains. That would be more efficient, however we stick to
683 687 # simple code for now.
684 688 all_revs = set()
685 689 for chain in candidate_chains:
686 690 all_revs.update(chain)
687 691 other = None
688 692 for r in revlog.revs(prev, snapfloor):
689 693 if r not in all_revs:
690 694 other = r
691 695 break
692 696 if other is not None:
693 697 # To avoid unfair competition, we won't use unrelated intermediate
694 698 # snapshot that are deeper than the ones from the parent delta
695 699 # chain.
696 700 max_depth = max(parents_snaps.keys())
697 701 chain = deltachain(other)
698 702 for idx, s in enumerate(chain):
699 703 if s < snapfloor:
700 704 continue
701 705 if max_depth < idx:
702 706 break
703 707 if not revlog.issnapshot(s):
704 708 break
705 709 parents_snaps[idx].add(s)
706 710 # Test them as possible intermediate snapshot base
707 711 # We test them from highest to lowest level. High level one are more
708 712 # likely to result in small delta
709 713 floor = None
710 714 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
711 715 siblings = set()
712 716 for s in snaps:
713 717 siblings.update(snapshots[s])
714 718 # Before considering making a new intermediate snapshot, we check
715 719 # if an existing snapshot, children of base we consider, would be
716 720 # suitable.
717 721 #
718 722 # It give a change to reuse a delta chain "unrelated" to the
719 723 # current revision instead of starting our own. Without such
720 724 # re-use, topological branches would keep reopening new chains.
721 725 # Creating more and more snapshot as the repository grow.
722 726
723 727 if floor is not None:
724 728 # We only do this for siblings created after the one in our
725 729 # parent's delta chain. Those created before has less chances
726 730 # to be valid base since our ancestors had to create a new
727 731 # snapshot.
728 732 siblings = [r for r in siblings if floor < r]
729 733 yield tuple(sorted(siblings))
730 734 # then test the base from our parent's delta chain.
731 735 yield tuple(sorted(snaps))
732 736 floor = min(snaps)
733 737 # No suitable base found in the parent chain, search if any full
734 738 # snapshots emitted since parent's base would be a suitable base for an
735 739 # intermediate snapshot.
736 740 #
737 741 # It give a chance to reuse a delta chain unrelated to the current
738 742 # revisions instead of starting our own. Without such re-use,
739 743 # topological branches would keep reopening new full chains. Creating
740 744 # more and more snapshot as the repository grow.
741 745 yield tuple(snapshots[nullrev])
742 746
743 747 if not sparse:
744 748 # other approach failed try against prev to hopefully save us a
745 749 # fulltext.
746 750 yield (prev,)
747 751
748 752 class deltacomputer(object):
749 753 def __init__(self, revlog):
750 754 self.revlog = revlog
751 755
752 756 def buildtext(self, revinfo, fh):
753 757 """Builds a fulltext version of a revision
754 758
755 759 revinfo: _revisioninfo instance that contains all needed info
756 760 fh: file handle to either the .i or the .d revlog file,
757 761 depending on whether it is inlined or not
758 762 """
759 763 btext = revinfo.btext
760 764 if btext[0] is not None:
761 765 return btext[0]
762 766
763 767 revlog = self.revlog
764 768 cachedelta = revinfo.cachedelta
765 769 baserev = cachedelta[0]
766 770 delta = cachedelta[1]
767 771
768 772 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
769 773 revinfo.p1, revinfo.p2,
770 774 revinfo.flags, revinfo.node)
771 775 return fulltext
772 776
773 777 def _builddeltadiff(self, base, revinfo, fh):
774 778 revlog = self.revlog
775 779 t = self.buildtext(revinfo, fh)
776 780 if revlog.iscensored(base):
777 781 # deltas based on a censored revision must replace the
778 782 # full content in one patch, so delta works everywhere
779 783 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
780 784 delta = header + t
781 785 else:
782 786 ptext = revlog.revision(base, _df=fh, raw=True)
783 787 delta = mdiff.textdiff(ptext, t)
784 788
785 789 return delta
786 790
787 791 def _builddeltainfo(self, revinfo, base, fh):
788 792 # can we use the cached delta?
789 793 delta = None
790 794 if revinfo.cachedelta:
791 795 cachebase, cachediff = revinfo.cachedelta
792 796 #check if the diff still apply
793 797 currentbase = cachebase
794 798 while (currentbase != nullrev
795 799 and currentbase != base
796 800 and self.revlog.length(currentbase) == 0):
797 801 currentbase = self.revlog.deltaparent(currentbase)
798 802 if currentbase == base:
799 803 delta = revinfo.cachedelta[1]
800 804 if delta is None:
801 805 delta = self._builddeltadiff(base, revinfo, fh)
802 806 revlog = self.revlog
803 807 header, data = revlog.compress(delta)
804 808 deltalen = len(header) + len(data)
805 809 chainbase = revlog.chainbase(base)
806 810 offset = revlog.end(len(revlog) - 1)
807 811 dist = deltalen + offset - revlog.start(chainbase)
808 812 if revlog._generaldelta:
809 813 deltabase = base
810 814 else:
811 815 deltabase = chainbase
812 816 chainlen, compresseddeltalen = revlog._chaininfo(base)
813 817 chainlen += 1
814 818 compresseddeltalen += deltalen
815 819
816 820 revlog = self.revlog
817 821 snapshotdepth = None
818 822 if deltabase == nullrev:
819 823 snapshotdepth = 0
820 824 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
821 825 # A delta chain should always be one full snapshot,
822 826 # zero or more semi-snapshots, and zero or more deltas
823 827 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
824 828 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
825 829 snapshotdepth = len(revlog._deltachain(deltabase)[0])
826 830
827 831 return _deltainfo(dist, deltalen, (header, data), deltabase,
828 832 chainbase, chainlen, compresseddeltalen,
829 833 snapshotdepth)
830 834
831 835 def _fullsnapshotinfo(self, fh, revinfo):
832 836 curr = len(self.revlog)
833 837 rawtext = self.buildtext(revinfo, fh)
834 838 data = self.revlog.compress(rawtext)
835 839 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
836 840 deltabase = chainbase = curr
837 841 snapshotdepth = 0
838 842 chainlen = 1
839 843
840 844 return _deltainfo(dist, deltalen, data, deltabase,
841 845 chainbase, chainlen, compresseddeltalen,
842 846 snapshotdepth)
843 847
844 848 def finddeltainfo(self, revinfo, fh):
845 849 """Find an acceptable delta against a candidate revision
846 850
847 851 revinfo: information about the revision (instance of _revisioninfo)
848 852 fh: file handle to either the .i or the .d revlog file,
849 853 depending on whether it is inlined or not
850 854
851 855 Returns the first acceptable candidate revision, as ordered by
852 856 _candidategroups
853 857
854 858 If no suitable deltabase is found, we return delta info for a full
855 859 snapshot.
856 860 """
857 861 if not revinfo.textlen:
858 862 return self._fullsnapshotinfo(fh, revinfo)
859 863
860 864 # no delta for flag processor revision (see "candelta" for why)
861 865 # not calling candelta since only one revision needs test, also to
862 866 # avoid overhead fetching flags again.
863 867 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
864 868 return self._fullsnapshotinfo(fh, revinfo)
865 869
866 870 cachedelta = revinfo.cachedelta
867 871 p1 = revinfo.p1
868 872 p2 = revinfo.p2
869 873 revlog = self.revlog
870 874
871 875 deltainfo = None
872 876 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
873 877 groups = _candidategroups(self.revlog, revinfo.textlen,
874 878 p1r, p2r, cachedelta)
875 879 candidaterevs = next(groups)
876 880 while candidaterevs is not None:
877 881 nominateddeltas = []
878 882 if deltainfo is not None:
879 883 # if we already found a good delta,
880 884 # challenge it against refined candidates
881 885 nominateddeltas.append(deltainfo)
882 886 for candidaterev in candidaterevs:
883 887 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
884 888 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
885 889 nominateddeltas.append(candidatedelta)
886 890 if nominateddeltas:
887 891 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
888 892 if deltainfo is not None:
889 893 candidaterevs = groups.send(deltainfo.base)
890 894 else:
891 895 candidaterevs = next(groups)
892 896
893 897 if deltainfo is None:
894 898 deltainfo = self._fullsnapshotinfo(fh, revinfo)
895 899 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now