##// END OF EJS Templates
flagprocessors: use _processflagsraw in easy cases...
marmoute -
r43147:1ebf1a1e default
parent child Browse files
Show More
@@ -1,1048 +1,1047 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 33 util,
34 34 )
35 35
36 36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 37 LIMIT_DELTA2TEXT = 2
38 38
39 39 class _testrevlog(object):
40 40 """minimalist fake revlog to use in doctests"""
41 41
42 42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 43 """data is an list of revision payload boundaries"""
44 44 self._data = data
45 45 self._srdensitythreshold = density
46 46 self._srmingapsize = mingap
47 47 self._snapshot = set(snapshot)
48 48 self.index = None
49 49
50 50 def start(self, rev):
51 51 if rev == nullrev:
52 52 return 0
53 53 if rev == 0:
54 54 return 0
55 55 return self._data[rev - 1]
56 56
57 57 def end(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 return self._data[rev]
61 61
62 62 def length(self, rev):
63 63 return self.end(rev) - self.start(rev)
64 64
65 65 def __len__(self):
66 66 return len(self._data)
67 67
68 68 def issnapshot(self, rev):
69 69 if rev == nullrev:
70 70 return True
71 71 return rev in self._snapshot
72 72
73 73 def slicechunk(revlog, revs, targetsize=None):
74 74 """slice revs to reduce the amount of unrelated data to be read from disk.
75 75
76 76 ``revs`` is sliced into groups that should be read in one time.
77 77 Assume that revs are sorted.
78 78
79 79 The initial chunk is sliced until the overall density (payload/chunks-span
80 80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 81 `revlog._srmingapsize` is skipped.
82 82
83 83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 84 For consistency with other slicing choice, this limit won't go lower than
85 85 `revlog._srmingapsize`.
86 86
87 87 If individual revisions chunk are larger than this limit, they will still
88 88 be raised individually.
89 89
90 90 >>> data = [
91 91 ... 5, #00 (5)
92 92 ... 10, #01 (5)
93 93 ... 12, #02 (2)
94 94 ... 12, #03 (empty)
95 95 ... 27, #04 (15)
96 96 ... 31, #05 (4)
97 97 ... 31, #06 (empty)
98 98 ... 42, #07 (11)
99 99 ... 47, #08 (5)
100 100 ... 47, #09 (empty)
101 101 ... 48, #10 (1)
102 102 ... 51, #11 (3)
103 103 ... 74, #12 (23)
104 104 ... 85, #13 (11)
105 105 ... 86, #14 (1)
106 106 ... 91, #15 (5)
107 107 ... ]
108 108 >>> revlog = _testrevlog(data, snapshot=range(16))
109 109
110 110 >>> list(slicechunk(revlog, list(range(16))))
111 111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 112 >>> list(slicechunk(revlog, [0, 15]))
113 113 [[0], [15]]
114 114 >>> list(slicechunk(revlog, [0, 11, 15]))
115 115 [[0], [11], [15]]
116 116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 117 [[0], [11, 13, 15]]
118 118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 119 [[1, 2], [5, 8, 10, 11], [14]]
120 120
121 121 Slicing with a maximum chunk size
122 122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 123 [[0], [11], [13], [15]]
124 124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 125 [[0], [11], [13, 15]]
126 126
127 127 Slicing involving nullrev
128 128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 129 [[-1, 0], [11], [13, 15]]
130 130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 131 [[-1], [13], [15]]
132 132 """
133 133 if targetsize is not None:
134 134 targetsize = max(targetsize, revlog._srmingapsize)
135 135 # targetsize should not be specified when evaluating delta candidates:
136 136 # * targetsize is used to ensure we stay within specification when reading,
137 137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 138 if densityslicing is None:
139 139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 140 for chunk in densityslicing(revs,
141 141 revlog._srdensitythreshold,
142 142 revlog._srmingapsize):
143 143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 144 yield subchunk
145 145
146 146 def _slicechunktosize(revlog, revs, targetsize=None):
147 147 """slice revs to match the target size
148 148
149 149 This is intended to be used on chunk that density slicing selected by that
150 150 are still too large compared to the read garantee of revlog. This might
151 151 happens when "minimal gap size" interrupted the slicing or when chain are
152 152 built in a way that create large blocks next to each other.
153 153
154 154 >>> data = [
155 155 ... 3, #0 (3)
156 156 ... 5, #1 (2)
157 157 ... 6, #2 (1)
158 158 ... 8, #3 (2)
159 159 ... 8, #4 (empty)
160 160 ... 11, #5 (3)
161 161 ... 12, #6 (1)
162 162 ... 13, #7 (1)
163 163 ... 14, #8 (1)
164 164 ... ]
165 165
166 166 == All snapshots cases ==
167 167 >>> revlog = _testrevlog(data, snapshot=range(9))
168 168
169 169 Cases where chunk is already small enough
170 170 >>> list(_slicechunktosize(revlog, [0], 3))
171 171 [[0]]
172 172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
173 173 [[6, 7]]
174 174 >>> list(_slicechunktosize(revlog, [0], None))
175 175 [[0]]
176 176 >>> list(_slicechunktosize(revlog, [6, 7], None))
177 177 [[6, 7]]
178 178
179 179 cases where we need actual slicing
180 180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
181 181 [[0], [1]]
182 182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
183 183 [[1], [3]]
184 184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
185 185 [[1, 2], [3]]
186 186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
187 187 [[3], [5]]
188 188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
189 189 [[3], [5]]
190 190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
191 191 [[5], [6, 7, 8]]
192 192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
193 193 [[0], [1, 2], [3], [5], [6, 7, 8]]
194 194
195 195 Case with too large individual chunk (must return valid chunk)
196 196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
197 197 [[0], [1]]
198 198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
199 199 [[1], [3]]
200 200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
201 201 [[3], [5]]
202 202
203 203 == No Snapshot cases ==
204 204 >>> revlog = _testrevlog(data)
205 205
206 206 Cases where chunk is already small enough
207 207 >>> list(_slicechunktosize(revlog, [0], 3))
208 208 [[0]]
209 209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
210 210 [[6, 7]]
211 211 >>> list(_slicechunktosize(revlog, [0], None))
212 212 [[0]]
213 213 >>> list(_slicechunktosize(revlog, [6, 7], None))
214 214 [[6, 7]]
215 215
216 216 cases where we need actual slicing
217 217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
218 218 [[0], [1]]
219 219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
220 220 [[1], [3]]
221 221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
222 222 [[1], [2, 3]]
223 223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
224 224 [[3], [5]]
225 225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
226 226 [[3], [4, 5]]
227 227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
228 228 [[5], [6, 7, 8]]
229 229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
230 230 [[0], [1, 2], [3], [5], [6, 7, 8]]
231 231
232 232 Case with too large individual chunk (must return valid chunk)
233 233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
234 234 [[0], [1]]
235 235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
236 236 [[1], [3]]
237 237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
238 238 [[3], [5]]
239 239
240 240 == mixed case ==
241 241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
242 242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
243 243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
244 244 """
245 245 assert targetsize is None or 0 <= targetsize
246 246 startdata = revlog.start(revs[0])
247 247 enddata = revlog.end(revs[-1])
248 248 fullspan = enddata - startdata
249 249 if targetsize is None or fullspan <= targetsize:
250 250 yield revs
251 251 return
252 252
253 253 startrevidx = 0
254 254 endrevidx = 1
255 255 iterrevs = enumerate(revs)
256 256 next(iterrevs) # skip first rev.
257 257 # first step: get snapshots out of the way
258 258 for idx, r in iterrevs:
259 259 span = revlog.end(r) - startdata
260 260 snapshot = revlog.issnapshot(r)
261 261 if span <= targetsize and snapshot:
262 262 endrevidx = idx + 1
263 263 else:
264 264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
265 265 if chunk:
266 266 yield chunk
267 267 startrevidx = idx
268 268 startdata = revlog.start(r)
269 269 endrevidx = idx + 1
270 270 if not snapshot:
271 271 break
272 272
273 273 # for the others, we use binary slicing to quickly converge toward valid
274 274 # chunks (otherwise, we might end up looking for start/end of many
275 275 # revisions). This logic is not looking for the perfect slicing point, it
276 276 # focuses on quickly converging toward valid chunks.
277 277 nbitem = len(revs)
278 278 while (enddata - startdata) > targetsize:
279 279 endrevidx = nbitem
280 280 if nbitem - startrevidx <= 1:
281 281 break # protect against individual chunk larger than limit
282 282 localenddata = revlog.end(revs[endrevidx - 1])
283 283 span = localenddata - startdata
284 284 while span > targetsize:
285 285 if endrevidx - startrevidx <= 1:
286 286 break # protect against individual chunk larger than limit
287 287 endrevidx -= (endrevidx - startrevidx) // 2
288 288 localenddata = revlog.end(revs[endrevidx - 1])
289 289 span = localenddata - startdata
290 290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
291 291 if chunk:
292 292 yield chunk
293 293 startrevidx = endrevidx
294 294 startdata = revlog.start(revs[startrevidx])
295 295
296 296 chunk = _trimchunk(revlog, revs, startrevidx)
297 297 if chunk:
298 298 yield chunk
299 299
300 300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
301 301 mingapsize=0):
302 302 """slice revs to reduce the amount of unrelated data to be read from disk.
303 303
304 304 ``revs`` is sliced into groups that should be read in one time.
305 305 Assume that revs are sorted.
306 306
307 307 The initial chunk is sliced until the overall density (payload/chunks-span
308 308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
309 309 skipped.
310 310
311 311 >>> revlog = _testrevlog([
312 312 ... 5, #00 (5)
313 313 ... 10, #01 (5)
314 314 ... 12, #02 (2)
315 315 ... 12, #03 (empty)
316 316 ... 27, #04 (15)
317 317 ... 31, #05 (4)
318 318 ... 31, #06 (empty)
319 319 ... 42, #07 (11)
320 320 ... 47, #08 (5)
321 321 ... 47, #09 (empty)
322 322 ... 48, #10 (1)
323 323 ... 51, #11 (3)
324 324 ... 74, #12 (23)
325 325 ... 85, #13 (11)
326 326 ... 86, #14 (1)
327 327 ... 91, #15 (5)
328 328 ... ])
329 329
330 330 >>> list(_slicechunktodensity(revlog, list(range(16))))
331 331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
332 332 >>> list(_slicechunktodensity(revlog, [0, 15]))
333 333 [[0], [15]]
334 334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
335 335 [[0], [11], [15]]
336 336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
337 337 [[0], [11, 13, 15]]
338 338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
339 339 [[1, 2], [5, 8, 10, 11], [14]]
340 340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
341 341 ... mingapsize=20))
342 342 [[1, 2, 3, 5, 8, 10, 11], [14]]
343 343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 344 ... targetdensity=0.95))
345 345 [[1, 2], [5], [8, 10, 11], [14]]
346 346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 347 ... targetdensity=0.95, mingapsize=12))
348 348 [[1, 2], [5, 8, 10, 11], [14]]
349 349 """
350 350 start = revlog.start
351 351 length = revlog.length
352 352
353 353 if len(revs) <= 1:
354 354 yield revs
355 355 return
356 356
357 357 deltachainspan = segmentspan(revlog, revs)
358 358
359 359 if deltachainspan < mingapsize:
360 360 yield revs
361 361 return
362 362
363 363 readdata = deltachainspan
364 364 chainpayload = sum(length(r) for r in revs)
365 365
366 366 if deltachainspan:
367 367 density = chainpayload / float(deltachainspan)
368 368 else:
369 369 density = 1.0
370 370
371 371 if density >= targetdensity:
372 372 yield revs
373 373 return
374 374
375 375 # Store the gaps in a heap to have them sorted by decreasing size
376 376 gaps = []
377 377 prevend = None
378 378 for i, rev in enumerate(revs):
379 379 revstart = start(rev)
380 380 revlen = length(rev)
381 381
382 382 # Skip empty revisions to form larger holes
383 383 if revlen == 0:
384 384 continue
385 385
386 386 if prevend is not None:
387 387 gapsize = revstart - prevend
388 388 # only consider holes that are large enough
389 389 if gapsize > mingapsize:
390 390 gaps.append((gapsize, i))
391 391
392 392 prevend = revstart + revlen
393 393 # sort the gaps to pop them from largest to small
394 394 gaps.sort()
395 395
396 396 # Collect the indices of the largest holes until the density is acceptable
397 397 selected = []
398 398 while gaps and density < targetdensity:
399 399 gapsize, gapidx = gaps.pop()
400 400
401 401 selected.append(gapidx)
402 402
403 403 # the gap sizes are stored as negatives to be sorted decreasingly
404 404 # by the heap
405 405 readdata -= gapsize
406 406 if readdata > 0:
407 407 density = chainpayload / float(readdata)
408 408 else:
409 409 density = 1.0
410 410 selected.sort()
411 411
412 412 # Cut the revs at collected indices
413 413 previdx = 0
414 414 for idx in selected:
415 415
416 416 chunk = _trimchunk(revlog, revs, previdx, idx)
417 417 if chunk:
418 418 yield chunk
419 419
420 420 previdx = idx
421 421
422 422 chunk = _trimchunk(revlog, revs, previdx)
423 423 if chunk:
424 424 yield chunk
425 425
426 426 def _trimchunk(revlog, revs, startidx, endidx=None):
427 427 """returns revs[startidx:endidx] without empty trailing revs
428 428
429 429 Doctest Setup
430 430 >>> revlog = _testrevlog([
431 431 ... 5, #0
432 432 ... 10, #1
433 433 ... 12, #2
434 434 ... 12, #3 (empty)
435 435 ... 17, #4
436 436 ... 21, #5
437 437 ... 21, #6 (empty)
438 438 ... ])
439 439
440 440 Contiguous cases:
441 441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
442 442 [0, 1, 2, 3, 4, 5]
443 443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
444 444 [0, 1, 2, 3, 4]
445 445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
446 446 [0, 1, 2]
447 447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
448 448 [2]
449 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
450 450 [3, 4, 5]
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
452 452 [3, 4]
453 453
454 454 Discontiguous cases:
455 455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
456 456 [1, 3, 5]
457 457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
458 458 [1]
459 459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
460 460 [3, 5]
461 461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
462 462 [3, 5]
463 463 """
464 464 length = revlog.length
465 465
466 466 if endidx is None:
467 467 endidx = len(revs)
468 468
469 469 # If we have a non-emtpy delta candidate, there are nothing to trim
470 470 if revs[endidx - 1] < len(revlog):
471 471 # Trim empty revs at the end, except the very first revision of a chain
472 472 while (endidx > 1
473 473 and endidx > startidx
474 474 and length(revs[endidx - 1]) == 0):
475 475 endidx -= 1
476 476
477 477 return revs[startidx:endidx]
478 478
479 479 def segmentspan(revlog, revs):
480 480 """Get the byte span of a segment of revisions
481 481
482 482 revs is a sorted array of revision numbers
483 483
484 484 >>> revlog = _testrevlog([
485 485 ... 5, #0
486 486 ... 10, #1
487 487 ... 12, #2
488 488 ... 12, #3 (empty)
489 489 ... 17, #4
490 490 ... ])
491 491
492 492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
493 493 17
494 494 >>> segmentspan(revlog, [0, 4])
495 495 17
496 496 >>> segmentspan(revlog, [3, 4])
497 497 5
498 498 >>> segmentspan(revlog, [1, 2, 3,])
499 499 7
500 500 >>> segmentspan(revlog, [1, 3])
501 501 7
502 502 """
503 503 if not revs:
504 504 return 0
505 505 end = revlog.end(revs[-1])
506 506 return end - revlog.start(revs[0])
507 507
508 508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
509 509 """build full text from a (base, delta) pair and other metadata"""
510 510 # special case deltas which replace entire base; no need to decode
511 511 # base revision. this neatly avoids censored bases, which throw when
512 512 # they're decoded.
513 513 hlen = struct.calcsize(">lll")
514 514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
515 515 len(delta) - hlen):
516 516 fulltext = delta[hlen:]
517 517 else:
518 518 # deltabase is rawtext before changed by flag processors, which is
519 519 # equivalent to non-raw text
520 520 basetext = revlog.revision(baserev, _df=fh, raw=False)
521 521 fulltext = mdiff.patch(basetext, delta)
522 522
523 523 try:
524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
525 fulltext, validatehash = res
524 validatehash = revlog._processflagsraw(fulltext, flags)
526 525 if validatehash:
527 526 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
528 527 if flags & REVIDX_ISCENSORED:
529 528 raise error.StorageError(_('node %s is not censored') %
530 529 expectednode)
531 530 except error.CensoredNodeError:
532 531 # must pass the censored index flag to add censored revisions
533 532 if not flags & REVIDX_ISCENSORED:
534 533 raise
535 534 return fulltext
536 535
537 536 @attr.s(slots=True, frozen=True)
538 537 class _deltainfo(object):
539 538 distance = attr.ib()
540 539 deltalen = attr.ib()
541 540 data = attr.ib()
542 541 base = attr.ib()
543 542 chainbase = attr.ib()
544 543 chainlen = attr.ib()
545 544 compresseddeltalen = attr.ib()
546 545 snapshotdepth = attr.ib()
547 546
548 547 def isgooddeltainfo(revlog, deltainfo, revinfo):
549 548 """Returns True if the given delta is good. Good means that it is within
550 549 the disk span, disk size, and chain length bounds that we know to be
551 550 performant."""
552 551 if deltainfo is None:
553 552 return False
554 553
555 554 # - 'deltainfo.distance' is the distance from the base revision --
556 555 # bounding it limits the amount of I/O we need to do.
557 556 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
558 557 # deltas we need to apply -- bounding it limits the amount of CPU
559 558 # we consume.
560 559
561 560 textlen = revinfo.textlen
562 561 defaultmax = textlen * 4
563 562 maxdist = revlog._maxdeltachainspan
564 563 if not maxdist:
565 564 maxdist = deltainfo.distance # ensure the conditional pass
566 565 maxdist = max(maxdist, defaultmax)
567 566
568 567 # Bad delta from read span:
569 568 #
570 569 # If the span of data read is larger than the maximum allowed.
571 570 #
572 571 # In the sparse-revlog case, we rely on the associated "sparse reading"
573 572 # to avoid issue related to the span of data. In theory, it would be
574 573 # possible to build pathological revlog where delta pattern would lead
575 574 # to too many reads. However, they do not happen in practice at all. So
576 575 # we skip the span check entirely.
577 576 if not revlog._sparserevlog and maxdist < deltainfo.distance:
578 577 return False
579 578
580 579 # Bad delta from new delta size:
581 580 #
582 581 # If the delta size is larger than the target text, storing the
583 582 # delta will be inefficient.
584 583 if textlen < deltainfo.deltalen:
585 584 return False
586 585
587 586 # Bad delta from cumulated payload size:
588 587 #
589 588 # If the sum of delta get larger than K * target text length.
590 589 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
591 590 return False
592 591
593 592 # Bad delta from chain length:
594 593 #
595 594 # If the number of delta in the chain gets too high.
596 595 if (revlog._maxchainlen
597 596 and revlog._maxchainlen < deltainfo.chainlen):
598 597 return False
599 598
600 599 # bad delta from intermediate snapshot size limit
601 600 #
602 601 # If an intermediate snapshot size is higher than the limit. The
603 602 # limit exist to prevent endless chain of intermediate delta to be
604 603 # created.
605 604 if (deltainfo.snapshotdepth is not None and
606 605 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
607 606 return False
608 607
609 608 # bad delta if new intermediate snapshot is larger than the previous
610 609 # snapshot
611 610 if (deltainfo.snapshotdepth
612 611 and revlog.length(deltainfo.base) < deltainfo.deltalen):
613 612 return False
614 613
615 614 return True
616 615
617 616 # If a revision's full text is that much bigger than a base candidate full
618 617 # text's, it is very unlikely that it will produce a valid delta. We no longer
619 618 # consider these candidates.
620 619 LIMIT_BASE2TEXT = 500
621 620
622 621 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
623 622 """Provides group of revision to be tested as delta base
624 623
625 624 This top level function focus on emitting groups with unique and worthwhile
626 625 content. See _raw_candidate_groups for details about the group order.
627 626 """
628 627 # should we try to build a delta?
629 628 if not (len(revlog) and revlog._storedeltachains):
630 629 yield None
631 630 return
632 631
633 632 deltalength = revlog.length
634 633 deltaparent = revlog.deltaparent
635 634 sparse = revlog._sparserevlog
636 635 good = None
637 636
638 637 deltas_limit = textlen * LIMIT_DELTA2TEXT
639 638
640 639 tested = {nullrev}
641 640 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
642 641 while True:
643 642 temptative = candidates.send(good)
644 643 if temptative is None:
645 644 break
646 645 group = []
647 646 for rev in temptative:
648 647 # skip over empty delta (no need to include them in a chain)
649 648 while (revlog._generaldelta
650 649 and not (rev == nullrev
651 650 or rev in tested
652 651 or deltalength(rev))):
653 652 tested.add(rev)
654 653 rev = deltaparent(rev)
655 654 # no need to try a delta against nullrev, this will be done as a
656 655 # last resort.
657 656 if rev == nullrev:
658 657 continue
659 658 # filter out revision we tested already
660 659 if rev in tested:
661 660 continue
662 661 tested.add(rev)
663 662 # filter out delta base that will never produce good delta
664 663 if deltas_limit < revlog.length(rev):
665 664 continue
666 665 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
667 666 continue
668 667 # no delta for rawtext-changing revs (see "candelta" for why)
669 668 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
670 669 continue
671 670 # If we reach here, we are about to build and test a delta.
672 671 # The delta building process will compute the chaininfo in all
673 672 # case, since that computation is cached, it is fine to access it
674 673 # here too.
675 674 chainlen, chainsize = revlog._chaininfo(rev)
676 675 # if chain will be too long, skip base
677 676 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
678 677 continue
679 678 # if chain already have too much data, skip base
680 679 if deltas_limit < chainsize:
681 680 continue
682 681 if sparse and revlog.upperboundcomp is not None:
683 682 maxcomp = revlog.upperboundcomp
684 683 basenotsnap = (p1, p2, nullrev)
685 684 if rev not in basenotsnap and revlog.issnapshot(rev):
686 685 snapshotdepth = revlog.snapshotdepth(rev)
687 686 # If text is significantly larger than the base, we can
688 687 # expect the resulting delta to be proportional to the size
689 688 # difference
690 689 revsize = revlog.rawsize(rev)
691 690 rawsizedistance = max(textlen - revsize, 0)
692 691 # use an estimate of the compression upper bound.
693 692 lowestrealisticdeltalen = rawsizedistance // maxcomp
694 693
695 694 # check the absolute constraint on the delta size
696 695 snapshotlimit = textlen >> snapshotdepth
697 696 if snapshotlimit < lowestrealisticdeltalen:
698 697 # delta lower bound is larger than accepted upper bound
699 698 continue
700 699
701 700 # check the relative constraint on the delta size
702 701 revlength = revlog.length(rev)
703 702 if revlength < lowestrealisticdeltalen:
704 703 # delta probable lower bound is larger than target base
705 704 continue
706 705
707 706 group.append(rev)
708 707 if group:
709 708 # XXX: in the sparse revlog case, group can become large,
710 709 # impacting performances. Some bounding or slicing mecanism
711 710 # would help to reduce this impact.
712 711 good = yield tuple(group)
713 712 yield None
714 713
715 714 def _findsnapshots(revlog, cache, start_rev):
716 715 """find snapshot from start_rev to tip"""
717 716 if util.safehasattr(revlog.index, 'findsnapshots'):
718 717 revlog.index.findsnapshots(cache, start_rev)
719 718 else:
720 719 deltaparent = revlog.deltaparent
721 720 issnapshot = revlog.issnapshot
722 721 for rev in revlog.revs(start_rev):
723 722 if issnapshot(rev):
724 723 cache[deltaparent(rev)].append(rev)
725 724
726 725 def _refinedgroups(revlog, p1, p2, cachedelta):
727 726 good = None
728 727 # First we try to reuse a the delta contained in the bundle.
729 728 # (or from the source revlog)
730 729 #
731 730 # This logic only applies to general delta repositories and can be disabled
732 731 # through configuration. Disabling reuse source delta is useful when
733 732 # we want to make sure we recomputed "optimal" deltas.
734 733 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
735 734 # Assume what we received from the server is a good choice
736 735 # build delta will reuse the cache
737 736 good = yield (cachedelta[0],)
738 737 if good is not None:
739 738 yield None
740 739 return
741 740 snapshots = collections.defaultdict(list)
742 741 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
743 742 good = yield candidates
744 743 if good is not None:
745 744 break
746 745
747 746 # If sparse revlog is enabled, we can try to refine the available deltas
748 747 if not revlog._sparserevlog:
749 748 yield None
750 749 return
751 750
752 751 # if we have a refinable value, try to refine it
753 752 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
754 753 # refine snapshot down
755 754 previous = None
756 755 while previous != good:
757 756 previous = good
758 757 base = revlog.deltaparent(good)
759 758 if base == nullrev:
760 759 break
761 760 good = yield (base,)
762 761 # refine snapshot up
763 762 if not snapshots:
764 763 _findsnapshots(revlog, snapshots, good + 1)
765 764 previous = None
766 765 while good != previous:
767 766 previous = good
768 767 children = tuple(sorted(c for c in snapshots[good]))
769 768 good = yield children
770 769
771 770 # we have found nothing
772 771 yield None
773 772
774 773 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
775 774 """Provides group of revision to be tested as delta base
776 775
777 776 This lower level function focus on emitting delta theorically interresting
778 777 without looking it any practical details.
779 778
780 779 The group order aims at providing fast or small candidates first.
781 780 """
782 781 gdelta = revlog._generaldelta
783 782 # gate sparse behind general-delta because of issue6056
784 783 sparse = gdelta and revlog._sparserevlog
785 784 curr = len(revlog)
786 785 prev = curr - 1
787 786 deltachain = lambda rev: revlog._deltachain(rev)[0]
788 787
789 788 if gdelta:
790 789 # exclude already lazy tested base if any
791 790 parents = [p for p in (p1, p2) if p != nullrev]
792 791
793 792 if not revlog._deltabothparents and len(parents) == 2:
794 793 parents.sort()
795 794 # To minimize the chance of having to build a fulltext,
796 795 # pick first whichever parent is closest to us (max rev)
797 796 yield (parents[1],)
798 797 # then the other one (min rev) if the first did not fit
799 798 yield (parents[0],)
800 799 elif len(parents) > 0:
801 800 # Test all parents (1 or 2), and keep the best candidate
802 801 yield parents
803 802
804 803 if sparse and parents:
805 804 if snapshots is None:
806 805 # map: base-rev: snapshot-rev
807 806 snapshots = collections.defaultdict(list)
808 807 # See if we can use an existing snapshot in the parent chains to use as
809 808 # a base for a new intermediate-snapshot
810 809 #
811 810 # search for snapshot in parents delta chain
812 811 # map: snapshot-level: snapshot-rev
813 812 parents_snaps = collections.defaultdict(set)
814 813 candidate_chains = [deltachain(p) for p in parents]
815 814 for chain in candidate_chains:
816 815 for idx, s in enumerate(chain):
817 816 if not revlog.issnapshot(s):
818 817 break
819 818 parents_snaps[idx].add(s)
820 819 snapfloor = min(parents_snaps[0]) + 1
821 820 _findsnapshots(revlog, snapshots, snapfloor)
822 821 # search for the highest "unrelated" revision
823 822 #
824 823 # Adding snapshots used by "unrelated" revision increase the odd we
825 824 # reuse an independant, yet better snapshot chain.
826 825 #
827 826 # XXX instead of building a set of revisions, we could lazily enumerate
828 827 # over the chains. That would be more efficient, however we stick to
829 828 # simple code for now.
830 829 all_revs = set()
831 830 for chain in candidate_chains:
832 831 all_revs.update(chain)
833 832 other = None
834 833 for r in revlog.revs(prev, snapfloor):
835 834 if r not in all_revs:
836 835 other = r
837 836 break
838 837 if other is not None:
839 838 # To avoid unfair competition, we won't use unrelated intermediate
840 839 # snapshot that are deeper than the ones from the parent delta
841 840 # chain.
842 841 max_depth = max(parents_snaps.keys())
843 842 chain = deltachain(other)
844 843 for idx, s in enumerate(chain):
845 844 if s < snapfloor:
846 845 continue
847 846 if max_depth < idx:
848 847 break
849 848 if not revlog.issnapshot(s):
850 849 break
851 850 parents_snaps[idx].add(s)
852 851 # Test them as possible intermediate snapshot base
853 852 # We test them from highest to lowest level. High level one are more
854 853 # likely to result in small delta
855 854 floor = None
856 855 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
857 856 siblings = set()
858 857 for s in snaps:
859 858 siblings.update(snapshots[s])
860 859 # Before considering making a new intermediate snapshot, we check
861 860 # if an existing snapshot, children of base we consider, would be
862 861 # suitable.
863 862 #
864 863 # It give a change to reuse a delta chain "unrelated" to the
865 864 # current revision instead of starting our own. Without such
866 865 # re-use, topological branches would keep reopening new chains.
867 866 # Creating more and more snapshot as the repository grow.
868 867
869 868 if floor is not None:
870 869 # We only do this for siblings created after the one in our
871 870 # parent's delta chain. Those created before has less chances
872 871 # to be valid base since our ancestors had to create a new
873 872 # snapshot.
874 873 siblings = [r for r in siblings if floor < r]
875 874 yield tuple(sorted(siblings))
876 875 # then test the base from our parent's delta chain.
877 876 yield tuple(sorted(snaps))
878 877 floor = min(snaps)
879 878 # No suitable base found in the parent chain, search if any full
880 879 # snapshots emitted since parent's base would be a suitable base for an
881 880 # intermediate snapshot.
882 881 #
883 882 # It give a chance to reuse a delta chain unrelated to the current
884 883 # revisions instead of starting our own. Without such re-use,
885 884 # topological branches would keep reopening new full chains. Creating
886 885 # more and more snapshot as the repository grow.
887 886 yield tuple(snapshots[nullrev])
888 887
889 888 if not sparse:
890 889 # other approach failed try against prev to hopefully save us a
891 890 # fulltext.
892 891 yield (prev,)
893 892
894 893 class deltacomputer(object):
895 894 def __init__(self, revlog):
896 895 self.revlog = revlog
897 896
898 897 def buildtext(self, revinfo, fh):
899 898 """Builds a fulltext version of a revision
900 899
901 900 revinfo: _revisioninfo instance that contains all needed info
902 901 fh: file handle to either the .i or the .d revlog file,
903 902 depending on whether it is inlined or not
904 903 """
905 904 btext = revinfo.btext
906 905 if btext[0] is not None:
907 906 return btext[0]
908 907
909 908 revlog = self.revlog
910 909 cachedelta = revinfo.cachedelta
911 910 baserev = cachedelta[0]
912 911 delta = cachedelta[1]
913 912
914 913 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
915 914 revinfo.p1, revinfo.p2,
916 915 revinfo.flags, revinfo.node)
917 916 return fulltext
918 917
919 918 def _builddeltadiff(self, base, revinfo, fh):
920 919 revlog = self.revlog
921 920 t = self.buildtext(revinfo, fh)
922 921 if revlog.iscensored(base):
923 922 # deltas based on a censored revision must replace the
924 923 # full content in one patch, so delta works everywhere
925 924 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
926 925 delta = header + t
927 926 else:
928 927 ptext = revlog.rawdata(base, _df=fh)
929 928 delta = mdiff.textdiff(ptext, t)
930 929
931 930 return delta
932 931
933 932 def _builddeltainfo(self, revinfo, base, fh):
934 933 # can we use the cached delta?
935 934 revlog = self.revlog
936 935 chainbase = revlog.chainbase(base)
937 936 if revlog._generaldelta:
938 937 deltabase = base
939 938 else:
940 939 deltabase = chainbase
941 940 snapshotdepth = None
942 941 if revlog._sparserevlog and deltabase == nullrev:
943 942 snapshotdepth = 0
944 943 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
945 944 # A delta chain should always be one full snapshot,
946 945 # zero or more semi-snapshots, and zero or more deltas
947 946 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
948 947 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
949 948 snapshotdepth = len(revlog._deltachain(deltabase)[0])
950 949 delta = None
951 950 if revinfo.cachedelta:
952 951 cachebase, cachediff = revinfo.cachedelta
953 952 #check if the diff still apply
954 953 currentbase = cachebase
955 954 while (currentbase != nullrev
956 955 and currentbase != base
957 956 and self.revlog.length(currentbase) == 0):
958 957 currentbase = self.revlog.deltaparent(currentbase)
959 958 if self.revlog._lazydelta and currentbase == base:
960 959 delta = revinfo.cachedelta[1]
961 960 if delta is None:
962 961 delta = self._builddeltadiff(base, revinfo, fh)
963 962 # snapshotdept need to be neither None nor 0 level snapshot
964 963 if revlog.upperboundcomp is not None and snapshotdepth:
965 964 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
966 965 snapshotlimit = revinfo.textlen >> snapshotdepth
967 966 if snapshotlimit < lowestrealisticdeltalen:
968 967 return None
969 968 if revlog.length(base) < lowestrealisticdeltalen:
970 969 return None
971 970 header, data = revlog.compress(delta)
972 971 deltalen = len(header) + len(data)
973 972 offset = revlog.end(len(revlog) - 1)
974 973 dist = deltalen + offset - revlog.start(chainbase)
975 974 chainlen, compresseddeltalen = revlog._chaininfo(base)
976 975 chainlen += 1
977 976 compresseddeltalen += deltalen
978 977
979 978 return _deltainfo(dist, deltalen, (header, data), deltabase,
980 979 chainbase, chainlen, compresseddeltalen,
981 980 snapshotdepth)
982 981
983 982 def _fullsnapshotinfo(self, fh, revinfo):
984 983 curr = len(self.revlog)
985 984 rawtext = self.buildtext(revinfo, fh)
986 985 data = self.revlog.compress(rawtext)
987 986 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
988 987 deltabase = chainbase = curr
989 988 snapshotdepth = 0
990 989 chainlen = 1
991 990
992 991 return _deltainfo(dist, deltalen, data, deltabase,
993 992 chainbase, chainlen, compresseddeltalen,
994 993 snapshotdepth)
995 994
996 995 def finddeltainfo(self, revinfo, fh):
997 996 """Find an acceptable delta against a candidate revision
998 997
999 998 revinfo: information about the revision (instance of _revisioninfo)
1000 999 fh: file handle to either the .i or the .d revlog file,
1001 1000 depending on whether it is inlined or not
1002 1001
1003 1002 Returns the first acceptable candidate revision, as ordered by
1004 1003 _candidategroups
1005 1004
1006 1005 If no suitable deltabase is found, we return delta info for a full
1007 1006 snapshot.
1008 1007 """
1009 1008 if not revinfo.textlen:
1010 1009 return self._fullsnapshotinfo(fh, revinfo)
1011 1010
1012 1011 # no delta for flag processor revision (see "candelta" for why)
1013 1012 # not calling candelta since only one revision needs test, also to
1014 1013 # avoid overhead fetching flags again.
1015 1014 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1016 1015 return self._fullsnapshotinfo(fh, revinfo)
1017 1016
1018 1017 cachedelta = revinfo.cachedelta
1019 1018 p1 = revinfo.p1
1020 1019 p2 = revinfo.p2
1021 1020 revlog = self.revlog
1022 1021
1023 1022 deltainfo = None
1024 1023 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1025 1024 groups = _candidategroups(self.revlog, revinfo.textlen,
1026 1025 p1r, p2r, cachedelta)
1027 1026 candidaterevs = next(groups)
1028 1027 while candidaterevs is not None:
1029 1028 nominateddeltas = []
1030 1029 if deltainfo is not None:
1031 1030 # if we already found a good delta,
1032 1031 # challenge it against refined candidates
1033 1032 nominateddeltas.append(deltainfo)
1034 1033 for candidaterev in candidaterevs:
1035 1034 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1036 1035 if candidatedelta is not None:
1037 1036 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1038 1037 nominateddeltas.append(candidatedelta)
1039 1038 if nominateddeltas:
1040 1039 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1041 1040 if deltainfo is not None:
1042 1041 candidaterevs = groups.send(deltainfo.base)
1043 1042 else:
1044 1043 candidaterevs = next(groups)
1045 1044
1046 1045 if deltainfo is None:
1047 1046 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1048 1047 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now