##// END OF EJS Templates
delta-find: never do anything fancy when general delta is off...
marmoute -
r51331:02325712 stable
parent child Browse files
Show More
@@ -0,0 +1,196 b''
1 ============================================================================
2 Pulling from modern to a non-general delta target (and other related checks)
3 ============================================================================
4
5 There is various issue that can arise when we update the code with modern
6 storage in mind while working on delta processing. So this file is meant for
7 various scenario that might break in the future or have break in the past.
8
9 Setup
10 =====
11
12 Create a modern server with an older clone
13
14 $ cat << EOF >> $HGRCPATH
15 > [command-templates]
16 > log = "{desc} {tags}\n"
17 > EOF
18
19 $ hg init server
20
21 $ hg clone --quiet --pull server client --config format.usegeneraldelta=no
22 $ hg debugformat -R client | grep generaldelta
23 generaldelta: no
24
25 Create some complexe history
26
27 $ cd server
28 $ hg debugbuilddag -n '.+3:a$.+5:b/a:k$.+7:c/b:l$.+6:d/a:m<k+6/l+1/m'
29 $ hg log -G
30 o r36 tip
31 |\
32 | o r35
33 | |
34 | o r34
35 | |\
36 | | o r33
37 | | |
38 | | o r32
39 | | |
40 | | o r31
41 | | |
42 | | o r30
43 | | |
44 | | o r29
45 | | |
46 | | o r28
47 | | |
48 o | | r27 m
49 |\ \ \
50 | o | | r26 d
51 | | | |
52 | o | | r25
53 | | | |
54 | o | | r24
55 | | | |
56 | o | | r23
57 | | | |
58 | o | | r22
59 | | | |
60 | o | | r21
61 | | | |
62 | o | | r20
63 | / /
64 | o | r19 l
65 | |\ \
66 | | o | r18 c
67 | | | |
68 | | o | r17
69 | | | |
70 | | o | r16
71 | | | |
72 | | o | r15
73 | | | |
74 | | o | r14
75 | | | |
76 | | o | r13
77 | | | |
78 | | o | r12
79 | | | |
80 | | o | r11
81 | | /
82 +---o r10 k
83 | |/
84 | o r9 b
85 | |
86 | o r8
87 | |
88 | o r7
89 | |
90 | o r6
91 | |
92 | o r5
93 | |
94 | o r4
95 |
96 o r3 a
97 |
98 o r2
99 |
100 o r1
101 |
102 o r0
103
104 $ cd ..
105
106
107 Pull it in the client
108 =====================
109
110
111 pull with default value
112 -----------------------
113
114 $ cp -R client client-simple-pull
115 $ hg -R client-simple-pull pull
116 pulling from $TESTTMP/server
117 requesting all changes
118 adding changesets
119 adding manifests
120 adding file changes
121 added 37 changesets with 37 changes to 37 files
122 new changesets 61246295ee1e:b4b117cbbcf3
123 (run 'hg update' to get a working copy)
124 $ hg -R client-simple-pull verify
125 checking changesets
126 checking manifests
127 crosschecking files in changesets and manifests
128 checking files
129 checking dirstate
130 checked 37 changesets with 37 changes to 37 files
131
132
133 pull with "no-reuse" policy
134 ---------------------------
135
136 $ cp -R client client-no-reuse
137 $ hg -R client-no-reuse pull --config paths.default:pulled-delta-reuse-policy=no-reuse
138 pulling from $TESTTMP/server
139 requesting all changes
140 adding changesets
141 adding manifests
142 adding file changes
143 added 37 changesets with 37 changes to 37 files
144 new changesets 61246295ee1e:b4b117cbbcf3
145 (run 'hg update' to get a working copy)
146 $ hg -R client-no-reuse verify
147 checking changesets
148 checking manifests
149 crosschecking files in changesets and manifests
150 checking files
151 checking dirstate
152 checked 37 changesets with 37 changes to 37 files
153
154
155 pull with "try-base" policy
156 ---------------------------
157
158 $ cp -R client client-try-base
159 $ hg -R client-try-base pull --config paths.default:pulled-delta-reuse-policy=try-base
160 pulling from $TESTTMP/server
161 requesting all changes
162 adding changesets
163 adding manifests
164 adding file changes
165 added 37 changesets with 37 changes to 37 files
166 new changesets 61246295ee1e:b4b117cbbcf3
167 (run 'hg update' to get a working copy)
168 $ hg -R client-try-base verify
169 checking changesets
170 checking manifests
171 crosschecking files in changesets and manifests
172 checking files
173 checking dirstate
174 checked 37 changesets with 37 changes to 37 files
175
176
177 pull with "forced" policy
178 -------------------------
179
180 $ cp -R client client-forced
181 $ hg -R client-forced pull --config paths.default:pulled-delta-reuse-policy=forced
182 pulling from $TESTTMP/server
183 requesting all changes
184 adding changesets
185 adding manifests
186 adding file changes
187 added 37 changesets with 37 changes to 37 files
188 new changesets 61246295ee1e:b4b117cbbcf3
189 (run 'hg update' to get a working copy)
190 $ hg -R client-forced verify
191 checking changesets
192 checking manifests
193 crosschecking files in changesets and manifests
194 checking files
195 checking dirstate
196 checked 37 changesets with 37 changes to 37 files
@@ -1,1514 +1,1520 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_FORCE,
24 24 DELTA_BASE_REUSE_NO,
25 25 KIND_CHANGELOG,
26 26 KIND_FILELOG,
27 27 KIND_MANIFESTLOG,
28 28 REVIDX_ISCENSORED,
29 29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 30 )
31 31
32 32 from ..thirdparty import attr
33 33
34 34 from .. import (
35 35 error,
36 36 mdiff,
37 37 util,
38 38 )
39 39
40 40 from . import flagutil
41 41
42 42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 43 LIMIT_DELTA2TEXT = 2
44 44
45 45
46 46 class _testrevlog:
47 47 """minimalist fake revlog to use in doctests"""
48 48
49 49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 50 """data is an list of revision payload boundaries"""
51 51 self._data = data
52 52 self._srdensitythreshold = density
53 53 self._srmingapsize = mingap
54 54 self._snapshot = set(snapshot)
55 55 self.index = None
56 56
57 57 def start(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 if rev == 0:
61 61 return 0
62 62 return self._data[rev - 1]
63 63
64 64 def end(self, rev):
65 65 if rev == nullrev:
66 66 return 0
67 67 return self._data[rev]
68 68
69 69 def length(self, rev):
70 70 return self.end(rev) - self.start(rev)
71 71
72 72 def __len__(self):
73 73 return len(self._data)
74 74
75 75 def issnapshot(self, rev):
76 76 if rev == nullrev:
77 77 return True
78 78 return rev in self._snapshot
79 79
80 80
81 81 def slicechunk(revlog, revs, targetsize=None):
82 82 """slice revs to reduce the amount of unrelated data to be read from disk.
83 83
84 84 ``revs`` is sliced into groups that should be read in one time.
85 85 Assume that revs are sorted.
86 86
87 87 The initial chunk is sliced until the overall density (payload/chunks-span
88 88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 89 `revlog._srmingapsize` is skipped.
90 90
91 91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 92 For consistency with other slicing choice, this limit won't go lower than
93 93 `revlog._srmingapsize`.
94 94
95 95 If individual revisions chunk are larger than this limit, they will still
96 96 be raised individually.
97 97
98 98 >>> data = [
99 99 ... 5, #00 (5)
100 100 ... 10, #01 (5)
101 101 ... 12, #02 (2)
102 102 ... 12, #03 (empty)
103 103 ... 27, #04 (15)
104 104 ... 31, #05 (4)
105 105 ... 31, #06 (empty)
106 106 ... 42, #07 (11)
107 107 ... 47, #08 (5)
108 108 ... 47, #09 (empty)
109 109 ... 48, #10 (1)
110 110 ... 51, #11 (3)
111 111 ... 74, #12 (23)
112 112 ... 85, #13 (11)
113 113 ... 86, #14 (1)
114 114 ... 91, #15 (5)
115 115 ... ]
116 116 >>> revlog = _testrevlog(data, snapshot=range(16))
117 117
118 118 >>> list(slicechunk(revlog, list(range(16))))
119 119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 120 >>> list(slicechunk(revlog, [0, 15]))
121 121 [[0], [15]]
122 122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 123 [[0], [11], [15]]
124 124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 125 [[0], [11, 13, 15]]
126 126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 127 [[1, 2], [5, 8, 10, 11], [14]]
128 128
129 129 Slicing with a maximum chunk size
130 130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 131 [[0], [11], [13], [15]]
132 132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 133 [[0], [11], [13, 15]]
134 134
135 135 Slicing involving nullrev
136 136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 137 [[-1, 0], [11], [13, 15]]
138 138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 139 [[-1], [13], [15]]
140 140 """
141 141 if targetsize is not None:
142 142 targetsize = max(targetsize, revlog._srmingapsize)
143 143 # targetsize should not be specified when evaluating delta candidates:
144 144 # * targetsize is used to ensure we stay within specification when reading,
145 145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 146 if densityslicing is None:
147 147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 148 for chunk in densityslicing(
149 149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 150 ):
151 151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 152 yield subchunk
153 153
154 154
155 155 def _slicechunktosize(revlog, revs, targetsize=None):
156 156 """slice revs to match the target size
157 157
158 158 This is intended to be used on chunk that density slicing selected by that
159 159 are still too large compared to the read garantee of revlog. This might
160 160 happens when "minimal gap size" interrupted the slicing or when chain are
161 161 built in a way that create large blocks next to each other.
162 162
163 163 >>> data = [
164 164 ... 3, #0 (3)
165 165 ... 5, #1 (2)
166 166 ... 6, #2 (1)
167 167 ... 8, #3 (2)
168 168 ... 8, #4 (empty)
169 169 ... 11, #5 (3)
170 170 ... 12, #6 (1)
171 171 ... 13, #7 (1)
172 172 ... 14, #8 (1)
173 173 ... ]
174 174
175 175 == All snapshots cases ==
176 176 >>> revlog = _testrevlog(data, snapshot=range(9))
177 177
178 178 Cases where chunk is already small enough
179 179 >>> list(_slicechunktosize(revlog, [0], 3))
180 180 [[0]]
181 181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 182 [[6, 7]]
183 183 >>> list(_slicechunktosize(revlog, [0], None))
184 184 [[0]]
185 185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 186 [[6, 7]]
187 187
188 188 cases where we need actual slicing
189 189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 190 [[0], [1]]
191 191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 192 [[1], [3]]
193 193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 194 [[1, 2], [3]]
195 195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 196 [[3], [5]]
197 197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 198 [[3], [5]]
199 199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 200 [[5], [6, 7, 8]]
201 201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203 203
204 204 Case with too large individual chunk (must return valid chunk)
205 205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 206 [[0], [1]]
207 207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 208 [[1], [3]]
209 209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 210 [[3], [5]]
211 211
212 212 == No Snapshot cases ==
213 213 >>> revlog = _testrevlog(data)
214 214
215 215 Cases where chunk is already small enough
216 216 >>> list(_slicechunktosize(revlog, [0], 3))
217 217 [[0]]
218 218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 219 [[6, 7]]
220 220 >>> list(_slicechunktosize(revlog, [0], None))
221 221 [[0]]
222 222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 223 [[6, 7]]
224 224
225 225 cases where we need actual slicing
226 226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 227 [[0], [1]]
228 228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 229 [[1], [3]]
230 230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 231 [[1], [2, 3]]
232 232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 233 [[3], [5]]
234 234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 235 [[3], [4, 5]]
236 236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 237 [[5], [6, 7, 8]]
238 238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240 240
241 241 Case with too large individual chunk (must return valid chunk)
242 242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 243 [[0], [1]]
244 244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 245 [[1], [3]]
246 246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 247 [[3], [5]]
248 248
249 249 == mixed case ==
250 250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 253 """
254 254 assert targetsize is None or 0 <= targetsize
255 255 startdata = revlog.start(revs[0])
256 256 enddata = revlog.end(revs[-1])
257 257 fullspan = enddata - startdata
258 258 if targetsize is None or fullspan <= targetsize:
259 259 yield revs
260 260 return
261 261
262 262 startrevidx = 0
263 263 endrevidx = 1
264 264 iterrevs = enumerate(revs)
265 265 next(iterrevs) # skip first rev.
266 266 # first step: get snapshots out of the way
267 267 for idx, r in iterrevs:
268 268 span = revlog.end(r) - startdata
269 269 snapshot = revlog.issnapshot(r)
270 270 if span <= targetsize and snapshot:
271 271 endrevidx = idx + 1
272 272 else:
273 273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 274 if chunk:
275 275 yield chunk
276 276 startrevidx = idx
277 277 startdata = revlog.start(r)
278 278 endrevidx = idx + 1
279 279 if not snapshot:
280 280 break
281 281
282 282 # for the others, we use binary slicing to quickly converge toward valid
283 283 # chunks (otherwise, we might end up looking for start/end of many
284 284 # revisions). This logic is not looking for the perfect slicing point, it
285 285 # focuses on quickly converging toward valid chunks.
286 286 nbitem = len(revs)
287 287 while (enddata - startdata) > targetsize:
288 288 endrevidx = nbitem
289 289 if nbitem - startrevidx <= 1:
290 290 break # protect against individual chunk larger than limit
291 291 localenddata = revlog.end(revs[endrevidx - 1])
292 292 span = localenddata - startdata
293 293 while span > targetsize:
294 294 if endrevidx - startrevidx <= 1:
295 295 break # protect against individual chunk larger than limit
296 296 endrevidx -= (endrevidx - startrevidx) // 2
297 297 localenddata = revlog.end(revs[endrevidx - 1])
298 298 span = localenddata - startdata
299 299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 300 if chunk:
301 301 yield chunk
302 302 startrevidx = endrevidx
303 303 startdata = revlog.start(revs[startrevidx])
304 304
305 305 chunk = _trimchunk(revlog, revs, startrevidx)
306 306 if chunk:
307 307 yield chunk
308 308
309 309
310 310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 311 """slice revs to reduce the amount of unrelated data to be read from disk.
312 312
313 313 ``revs`` is sliced into groups that should be read in one time.
314 314 Assume that revs are sorted.
315 315
316 316 The initial chunk is sliced until the overall density (payload/chunks-span
317 317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 318 skipped.
319 319
320 320 >>> revlog = _testrevlog([
321 321 ... 5, #00 (5)
322 322 ... 10, #01 (5)
323 323 ... 12, #02 (2)
324 324 ... 12, #03 (empty)
325 325 ... 27, #04 (15)
326 326 ... 31, #05 (4)
327 327 ... 31, #06 (empty)
328 328 ... 42, #07 (11)
329 329 ... 47, #08 (5)
330 330 ... 47, #09 (empty)
331 331 ... 48, #10 (1)
332 332 ... 51, #11 (3)
333 333 ... 74, #12 (23)
334 334 ... 85, #13 (11)
335 335 ... 86, #14 (1)
336 336 ... 91, #15 (5)
337 337 ... ])
338 338
339 339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 342 [[0], [15]]
343 343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 344 [[0], [11], [15]]
345 345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 346 [[0], [11, 13, 15]]
347 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 348 [[1, 2], [5, 8, 10, 11], [14]]
349 349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 350 ... mingapsize=20))
351 351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 353 ... targetdensity=0.95))
354 354 [[1, 2], [5], [8, 10, 11], [14]]
355 355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 356 ... targetdensity=0.95, mingapsize=12))
357 357 [[1, 2], [5, 8, 10, 11], [14]]
358 358 """
359 359 start = revlog.start
360 360 length = revlog.length
361 361
362 362 if len(revs) <= 1:
363 363 yield revs
364 364 return
365 365
366 366 deltachainspan = segmentspan(revlog, revs)
367 367
368 368 if deltachainspan < mingapsize:
369 369 yield revs
370 370 return
371 371
372 372 readdata = deltachainspan
373 373 chainpayload = sum(length(r) for r in revs)
374 374
375 375 if deltachainspan:
376 376 density = chainpayload / float(deltachainspan)
377 377 else:
378 378 density = 1.0
379 379
380 380 if density >= targetdensity:
381 381 yield revs
382 382 return
383 383
384 384 # Store the gaps in a heap to have them sorted by decreasing size
385 385 gaps = []
386 386 prevend = None
387 387 for i, rev in enumerate(revs):
388 388 revstart = start(rev)
389 389 revlen = length(rev)
390 390
391 391 # Skip empty revisions to form larger holes
392 392 if revlen == 0:
393 393 continue
394 394
395 395 if prevend is not None:
396 396 gapsize = revstart - prevend
397 397 # only consider holes that are large enough
398 398 if gapsize > mingapsize:
399 399 gaps.append((gapsize, i))
400 400
401 401 prevend = revstart + revlen
402 402 # sort the gaps to pop them from largest to small
403 403 gaps.sort()
404 404
405 405 # Collect the indices of the largest holes until the density is acceptable
406 406 selected = []
407 407 while gaps and density < targetdensity:
408 408 gapsize, gapidx = gaps.pop()
409 409
410 410 selected.append(gapidx)
411 411
412 412 # the gap sizes are stored as negatives to be sorted decreasingly
413 413 # by the heap
414 414 readdata -= gapsize
415 415 if readdata > 0:
416 416 density = chainpayload / float(readdata)
417 417 else:
418 418 density = 1.0
419 419 selected.sort()
420 420
421 421 # Cut the revs at collected indices
422 422 previdx = 0
423 423 for idx in selected:
424 424
425 425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 426 if chunk:
427 427 yield chunk
428 428
429 429 previdx = idx
430 430
431 431 chunk = _trimchunk(revlog, revs, previdx)
432 432 if chunk:
433 433 yield chunk
434 434
435 435
436 436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 437 """returns revs[startidx:endidx] without empty trailing revs
438 438
439 439 Doctest Setup
440 440 >>> revlog = _testrevlog([
441 441 ... 5, #0
442 442 ... 10, #1
443 443 ... 12, #2
444 444 ... 12, #3 (empty)
445 445 ... 17, #4
446 446 ... 21, #5
447 447 ... 21, #6 (empty)
448 448 ... ])
449 449
450 450 Contiguous cases:
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 452 [0, 1, 2, 3, 4, 5]
453 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 454 [0, 1, 2, 3, 4]
455 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 456 [0, 1, 2]
457 457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 458 [2]
459 459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 460 [3, 4, 5]
461 461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 462 [3, 4]
463 463
464 464 Discontiguous cases:
465 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 466 [1, 3, 5]
467 467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 468 [1]
469 469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 470 [3, 5]
471 471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 472 [3, 5]
473 473 """
474 474 length = revlog.length
475 475
476 476 if endidx is None:
477 477 endidx = len(revs)
478 478
479 479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 480 if revs[endidx - 1] < len(revlog):
481 481 # Trim empty revs at the end, except the very first revision of a chain
482 482 while (
483 483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 484 ):
485 485 endidx -= 1
486 486
487 487 return revs[startidx:endidx]
488 488
489 489
490 490 def segmentspan(revlog, revs):
491 491 """Get the byte span of a segment of revisions
492 492
493 493 revs is a sorted array of revision numbers
494 494
495 495 >>> revlog = _testrevlog([
496 496 ... 5, #0
497 497 ... 10, #1
498 498 ... 12, #2
499 499 ... 12, #3 (empty)
500 500 ... 17, #4
501 501 ... ])
502 502
503 503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 504 17
505 505 >>> segmentspan(revlog, [0, 4])
506 506 17
507 507 >>> segmentspan(revlog, [3, 4])
508 508 5
509 509 >>> segmentspan(revlog, [1, 2, 3,])
510 510 7
511 511 >>> segmentspan(revlog, [1, 3])
512 512 7
513 513 """
514 514 if not revs:
515 515 return 0
516 516 end = revlog.end(revs[-1])
517 517 return end - revlog.start(revs[0])
518 518
519 519
520 520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 521 """build full text from a (base, delta) pair and other metadata"""
522 522 # special case deltas which replace entire base; no need to decode
523 523 # base revision. this neatly avoids censored bases, which throw when
524 524 # they're decoded.
525 525 hlen = struct.calcsize(b">lll")
526 526 if delta[:hlen] == mdiff.replacediffheader(
527 527 revlog.rawsize(baserev), len(delta) - hlen
528 528 ):
529 529 fulltext = delta[hlen:]
530 530 else:
531 531 # deltabase is rawtext before changed by flag processors, which is
532 532 # equivalent to non-raw text
533 533 basetext = revlog.revision(baserev, _df=fh)
534 534 fulltext = mdiff.patch(basetext, delta)
535 535
536 536 try:
537 537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 538 if validatehash:
539 539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 540 if flags & REVIDX_ISCENSORED:
541 541 raise error.StorageError(
542 542 _(b'node %s is not censored') % expectednode
543 543 )
544 544 except error.CensoredNodeError:
545 545 # must pass the censored index flag to add censored revisions
546 546 if not flags & REVIDX_ISCENSORED:
547 547 raise
548 548 return fulltext
549 549
550 550
551 551 @attr.s(slots=True, frozen=True)
552 552 class _deltainfo:
553 553 distance = attr.ib()
554 554 deltalen = attr.ib()
555 555 data = attr.ib()
556 556 base = attr.ib()
557 557 chainbase = attr.ib()
558 558 chainlen = attr.ib()
559 559 compresseddeltalen = attr.ib()
560 560 snapshotdepth = attr.ib()
561 561
562 562
563 563 def drop_u_compression(delta):
564 564 """turn into a "u" (no-compression) into no-compression without header
565 565
566 566 This is useful for revlog format that has better compression method.
567 567 """
568 568 assert delta.data[0] == b'u', delta.data[0]
569 569 return _deltainfo(
570 570 delta.distance,
571 571 delta.deltalen - 1,
572 572 (b'', delta.data[1]),
573 573 delta.base,
574 574 delta.chainbase,
575 575 delta.chainlen,
576 576 delta.compresseddeltalen,
577 577 delta.snapshotdepth,
578 578 )
579 579
580 580
581 581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 582 """Returns True if the given delta is good. Good means that it is within
583 583 the disk span, disk size, and chain length bounds that we know to be
584 584 performant."""
585 585 if deltainfo is None:
586 586 return False
587 587
588 588 if (
589 589 revinfo.cachedelta is not None
590 590 and deltainfo.base == revinfo.cachedelta[0]
591 591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 592 ):
593 593 return True
594 594
595 595 # - 'deltainfo.distance' is the distance from the base revision --
596 596 # bounding it limits the amount of I/O we need to do.
597 597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 598 # deltas we need to apply -- bounding it limits the amount of CPU
599 599 # we consume.
600 600
601 601 textlen = revinfo.textlen
602 602 defaultmax = textlen * 4
603 603 maxdist = revlog._maxdeltachainspan
604 604 if not maxdist:
605 605 maxdist = deltainfo.distance # ensure the conditional pass
606 606 maxdist = max(maxdist, defaultmax)
607 607
608 608 # Bad delta from read span:
609 609 #
610 610 # If the span of data read is larger than the maximum allowed.
611 611 #
612 612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 613 # to avoid issue related to the span of data. In theory, it would be
614 614 # possible to build pathological revlog where delta pattern would lead
615 615 # to too many reads. However, they do not happen in practice at all. So
616 616 # we skip the span check entirely.
617 617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 618 return False
619 619
620 620 # Bad delta from new delta size:
621 621 #
622 622 # If the delta size is larger than the target text, storing the
623 623 # delta will be inefficient.
624 624 if textlen < deltainfo.deltalen:
625 625 return False
626 626
627 627 # Bad delta from cumulated payload size:
628 628 #
629 629 # If the sum of delta get larger than K * target text length.
630 630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 631 return False
632 632
633 633 # Bad delta from chain length:
634 634 #
635 635 # If the number of delta in the chain gets too high.
636 636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 637 return False
638 638
639 639 # bad delta from intermediate snapshot size limit
640 640 #
641 641 # If an intermediate snapshot size is higher than the limit. The
642 642 # limit exist to prevent endless chain of intermediate delta to be
643 643 # created.
644 644 if (
645 645 deltainfo.snapshotdepth is not None
646 646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 647 ):
648 648 return False
649 649
650 650 # bad delta if new intermediate snapshot is larger than the previous
651 651 # snapshot
652 652 if (
653 653 deltainfo.snapshotdepth
654 654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 655 ):
656 656 return False
657 657
658 658 return True
659 659
660 660
661 661 # If a revision's full text is that much bigger than a base candidate full
662 662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 663 # consider these candidates.
664 664 LIMIT_BASE2TEXT = 500
665 665
666 666
667 667 def _candidategroups(
668 668 revlog,
669 669 textlen,
670 670 p1,
671 671 p2,
672 672 cachedelta,
673 673 excluded_bases=None,
674 674 target_rev=None,
675 675 snapshot_cache=None,
676 676 ):
677 677 """Provides group of revision to be tested as delta base
678 678
679 679 This top level function focus on emitting groups with unique and worthwhile
680 680 content. See _raw_candidate_groups for details about the group order.
681 681 """
682 682 # should we try to build a delta?
683 683 if not (len(revlog) and revlog._storedeltachains):
684 684 yield None
685 685 return
686 686
687 687 if target_rev is None:
688 688 target_rev = len(revlog)
689 689
690 if not revlog._generaldelta:
691 # before general delta, there is only one possible delta base
692 yield (target_rev - 1,)
693 yield None
694 return
695
690 696 if (
691 697 cachedelta is not None
692 698 and nullrev == cachedelta[0]
693 699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
694 700 ):
695 701 # instruction are to forcibly do a full snapshot
696 702 yield None
697 703 return
698 704
699 705 deltalength = revlog.length
700 706 deltaparent = revlog.deltaparent
701 707 sparse = revlog._sparserevlog
702 708 good = None
703 709
704 710 deltas_limit = textlen * LIMIT_DELTA2TEXT
705 711 group_chunk_size = revlog._candidate_group_chunk_size
706 712
707 713 tested = {nullrev}
708 714 candidates = _refinedgroups(
709 715 revlog,
710 716 p1,
711 717 p2,
712 718 cachedelta,
713 719 snapshot_cache=snapshot_cache,
714 720 )
715 721 while True:
716 722 temptative = candidates.send(good)
717 723 if temptative is None:
718 724 break
719 725 group = []
720 726 for rev in temptative:
721 727 # skip over empty delta (no need to include them in a chain)
722 728 while revlog._generaldelta and not (
723 729 rev == nullrev or rev in tested or deltalength(rev)
724 730 ):
725 731 tested.add(rev)
726 732 rev = deltaparent(rev)
727 733 # no need to try a delta against nullrev, this will be done as a
728 734 # last resort.
729 735 if rev == nullrev:
730 736 continue
731 737 # filter out revision we tested already
732 738 if rev in tested:
733 739 continue
734 740
735 741 if (
736 742 cachedelta is not None
737 743 and rev == cachedelta[0]
738 744 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
739 745 ):
740 746 # instructions are to forcibly consider/use this delta base
741 747 group.append(rev)
742 748 continue
743 749
744 750 # an higher authority deamed the base unworthy (e.g. censored)
745 751 if excluded_bases is not None and rev in excluded_bases:
746 752 tested.add(rev)
747 753 continue
748 754 # We are in some recomputation cases and that rev is too high in
749 755 # the revlog
750 756 if target_rev is not None and rev >= target_rev:
751 757 tested.add(rev)
752 758 continue
753 759 # filter out delta base that will never produce good delta
754 760 if deltas_limit < revlog.length(rev):
755 761 tested.add(rev)
756 762 continue
757 763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
758 764 tested.add(rev)
759 765 continue
760 766 # no delta for rawtext-changing revs (see "candelta" for why)
761 767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
762 768 tested.add(rev)
763 769 continue
764 770
765 771 # If we reach here, we are about to build and test a delta.
766 772 # The delta building process will compute the chaininfo in all
767 773 # case, since that computation is cached, it is fine to access it
768 774 # here too.
769 775 chainlen, chainsize = revlog._chaininfo(rev)
770 776 # if chain will be too long, skip base
771 777 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
772 778 tested.add(rev)
773 779 continue
774 780 # if chain already have too much data, skip base
775 781 if deltas_limit < chainsize:
776 782 tested.add(rev)
777 783 continue
778 784 if sparse and revlog.upperboundcomp is not None:
779 785 maxcomp = revlog.upperboundcomp
780 786 basenotsnap = (p1, p2, nullrev)
781 787 if rev not in basenotsnap and revlog.issnapshot(rev):
782 788 snapshotdepth = revlog.snapshotdepth(rev)
783 789 # If text is significantly larger than the base, we can
784 790 # expect the resulting delta to be proportional to the size
785 791 # difference
786 792 revsize = revlog.rawsize(rev)
787 793 rawsizedistance = max(textlen - revsize, 0)
788 794 # use an estimate of the compression upper bound.
789 795 lowestrealisticdeltalen = rawsizedistance // maxcomp
790 796
791 797 # check the absolute constraint on the delta size
792 798 snapshotlimit = textlen >> snapshotdepth
793 799 if snapshotlimit < lowestrealisticdeltalen:
794 800 # delta lower bound is larger than accepted upper bound
795 801 tested.add(rev)
796 802 continue
797 803
798 804 # check the relative constraint on the delta size
799 805 revlength = revlog.length(rev)
800 806 if revlength < lowestrealisticdeltalen:
801 807 # delta probable lower bound is larger than target base
802 808 tested.add(rev)
803 809 continue
804 810
805 811 group.append(rev)
806 812 if group:
807 813 # When the size of the candidate group is big, it can result in a
808 814 # quite significant performance impact. To reduce this, we can send
809 815 # them in smaller batches until the new batch does not provide any
810 816 # improvements.
811 817 #
812 818 # This might reduce the overall efficiency of the compression in
813 819 # some corner cases, but that should also prevent very pathological
814 820 # cases from being an issue. (eg. 20 000 candidates).
815 821 #
816 822 # XXX note that the ordering of the group becomes important as it
817 823 # now impacts the final result. The current order is unprocessed
818 824 # and can be improved.
819 825 if group_chunk_size == 0:
820 826 tested.update(group)
821 827 good = yield tuple(group)
822 828 else:
823 829 prev_good = good
824 830 for start in range(0, len(group), group_chunk_size):
825 831 sub_group = group[start : start + group_chunk_size]
826 832 tested.update(sub_group)
827 833 good = yield tuple(sub_group)
828 834 if prev_good == good:
829 835 break
830 836
831 837 yield None
832 838
833 839
834 840 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
835 841 good = None
836 842 # First we try to reuse a the delta contained in the bundle.
837 843 # (or from the source revlog)
838 844 #
839 845 # This logic only applies to general delta repositories and can be disabled
840 846 # through configuration. Disabling reuse source delta is useful when
841 847 # we want to make sure we recomputed "optimal" deltas.
842 848 debug_info = None
843 849 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
844 850 # Assume what we received from the server is a good choice
845 851 # build delta will reuse the cache
846 852 if debug_info is not None:
847 853 debug_info['cached-delta.tested'] += 1
848 854 good = yield (cachedelta[0],)
849 855 if good is not None:
850 856 if debug_info is not None:
851 857 debug_info['cached-delta.accepted'] += 1
852 858 yield None
853 859 return
854 860 if snapshot_cache is None:
855 861 snapshot_cache = SnapshotCache()
856 862 groups = _rawgroups(
857 863 revlog,
858 864 p1,
859 865 p2,
860 866 cachedelta,
861 867 snapshot_cache,
862 868 )
863 869 for candidates in groups:
864 870 good = yield candidates
865 871 if good is not None:
866 872 break
867 873
868 874 # If sparse revlog is enabled, we can try to refine the available deltas
869 875 if not revlog._sparserevlog:
870 876 yield None
871 877 return
872 878
873 879 # if we have a refinable value, try to refine it
874 880 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
875 881 # refine snapshot down
876 882 previous = None
877 883 while previous != good:
878 884 previous = good
879 885 base = revlog.deltaparent(good)
880 886 if base == nullrev:
881 887 break
882 888 good = yield (base,)
883 889 # refine snapshot up
884 890 if not snapshot_cache.snapshots:
885 891 snapshot_cache.update(revlog, good + 1)
886 892 previous = None
887 893 while good != previous:
888 894 previous = good
889 895 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
890 896 good = yield children
891 897
892 898 if debug_info is not None:
893 899 if good is None:
894 900 debug_info['no-solution'] += 1
895 901
896 902 yield None
897 903
898 904
899 905 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
900 906 """Provides group of revision to be tested as delta base
901 907
902 908 This lower level function focus on emitting delta theorically interresting
903 909 without looking it any practical details.
904 910
905 911 The group order aims at providing fast or small candidates first.
906 912 """
907 913 gdelta = revlog._generaldelta
908 914 # gate sparse behind general-delta because of issue6056
909 915 sparse = gdelta and revlog._sparserevlog
910 916 curr = len(revlog)
911 917 prev = curr - 1
912 918 deltachain = lambda rev: revlog._deltachain(rev)[0]
913 919
914 920 if gdelta:
915 921 # exclude already lazy tested base if any
916 922 parents = [p for p in (p1, p2) if p != nullrev]
917 923
918 924 if not revlog._deltabothparents and len(parents) == 2:
919 925 parents.sort()
920 926 # To minimize the chance of having to build a fulltext,
921 927 # pick first whichever parent is closest to us (max rev)
922 928 yield (parents[1],)
923 929 # then the other one (min rev) if the first did not fit
924 930 yield (parents[0],)
925 931 elif len(parents) > 0:
926 932 # Test all parents (1 or 2), and keep the best candidate
927 933 yield parents
928 934
929 935 if sparse and parents:
930 936 if snapshot_cache is None:
931 937 # map: base-rev: [snapshot-revs]
932 938 snapshot_cache = SnapshotCache()
933 939 # See if we can use an existing snapshot in the parent chains to use as
934 940 # a base for a new intermediate-snapshot
935 941 #
936 942 # search for snapshot in parents delta chain
937 943 # map: snapshot-level: snapshot-rev
938 944 parents_snaps = collections.defaultdict(set)
939 945 candidate_chains = [deltachain(p) for p in parents]
940 946 for chain in candidate_chains:
941 947 for idx, s in enumerate(chain):
942 948 if not revlog.issnapshot(s):
943 949 break
944 950 parents_snaps[idx].add(s)
945 951 snapfloor = min(parents_snaps[0]) + 1
946 952 snapshot_cache.update(revlog, snapfloor)
947 953 # search for the highest "unrelated" revision
948 954 #
949 955 # Adding snapshots used by "unrelated" revision increase the odd we
950 956 # reuse an independant, yet better snapshot chain.
951 957 #
952 958 # XXX instead of building a set of revisions, we could lazily enumerate
953 959 # over the chains. That would be more efficient, however we stick to
954 960 # simple code for now.
955 961 all_revs = set()
956 962 for chain in candidate_chains:
957 963 all_revs.update(chain)
958 964 other = None
959 965 for r in revlog.revs(prev, snapfloor):
960 966 if r not in all_revs:
961 967 other = r
962 968 break
963 969 if other is not None:
964 970 # To avoid unfair competition, we won't use unrelated intermediate
965 971 # snapshot that are deeper than the ones from the parent delta
966 972 # chain.
967 973 max_depth = max(parents_snaps.keys())
968 974 chain = deltachain(other)
969 975 for depth, s in enumerate(chain):
970 976 if s < snapfloor:
971 977 continue
972 978 if max_depth < depth:
973 979 break
974 980 if not revlog.issnapshot(s):
975 981 break
976 982 parents_snaps[depth].add(s)
977 983 # Test them as possible intermediate snapshot base
978 984 # We test them from highest to lowest level. High level one are more
979 985 # likely to result in small delta
980 986 floor = None
981 987 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
982 988 siblings = set()
983 989 for s in snaps:
984 990 siblings.update(snapshot_cache.snapshots[s])
985 991 # Before considering making a new intermediate snapshot, we check
986 992 # if an existing snapshot, children of base we consider, would be
987 993 # suitable.
988 994 #
989 995 # It give a change to reuse a delta chain "unrelated" to the
990 996 # current revision instead of starting our own. Without such
991 997 # re-use, topological branches would keep reopening new chains.
992 998 # Creating more and more snapshot as the repository grow.
993 999
994 1000 if floor is not None:
995 1001 # We only do this for siblings created after the one in our
996 1002 # parent's delta chain. Those created before has less chances
997 1003 # to be valid base since our ancestors had to create a new
998 1004 # snapshot.
999 1005 siblings = [r for r in siblings if floor < r]
1000 1006 yield tuple(sorted(siblings))
1001 1007 # then test the base from our parent's delta chain.
1002 1008 yield tuple(sorted(snaps))
1003 1009 floor = min(snaps)
1004 1010 # No suitable base found in the parent chain, search if any full
1005 1011 # snapshots emitted since parent's base would be a suitable base for an
1006 1012 # intermediate snapshot.
1007 1013 #
1008 1014 # It give a chance to reuse a delta chain unrelated to the current
1009 1015 # revisions instead of starting our own. Without such re-use,
1010 1016 # topological branches would keep reopening new full chains. Creating
1011 1017 # more and more snapshot as the repository grow.
1012 1018 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1013 1019 yield tuple(sorted(full))
1014 1020
1015 1021 if not sparse:
1016 1022 # other approach failed try against prev to hopefully save us a
1017 1023 # fulltext.
1018 1024 yield (prev,)
1019 1025
1020 1026
1021 1027 class SnapshotCache:
1022 1028 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1023 1029
1024 1030 def __init__(self):
1025 1031 self.snapshots = collections.defaultdict(set)
1026 1032 self._start_rev = None
1027 1033 self._end_rev = None
1028 1034
1029 1035 def update(self, revlog, start_rev=0):
1030 1036 """find snapshots from start_rev to tip"""
1031 1037 nb_revs = len(revlog)
1032 1038 end_rev = nb_revs - 1
1033 1039 if start_rev > end_rev:
1034 1040 return # range is empty
1035 1041
1036 1042 if self._start_rev is None:
1037 1043 assert self._end_rev is None
1038 1044 self._update(revlog, start_rev, end_rev)
1039 1045 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1040 1046 if start_rev < self._start_rev:
1041 1047 self._update(revlog, start_rev, self._start_rev - 1)
1042 1048 if self._end_rev < end_rev:
1043 1049 self._update(revlog, self._end_rev + 1, end_rev)
1044 1050
1045 1051 if self._start_rev is None:
1046 1052 assert self._end_rev is None
1047 1053 self._end_rev = end_rev
1048 1054 self._start_rev = start_rev
1049 1055 else:
1050 1056 self._start_rev = min(self._start_rev, start_rev)
1051 1057 self._end_rev = max(self._end_rev, end_rev)
1052 1058 assert self._start_rev <= self._end_rev, (
1053 1059 self._start_rev,
1054 1060 self._end_rev,
1055 1061 )
1056 1062
1057 1063 def _update(self, revlog, start_rev, end_rev):
1058 1064 """internal method that actually do update content"""
1059 1065 assert self._start_rev is None or (
1060 1066 start_rev < self._start_rev or start_rev > self._end_rev
1061 1067 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1062 1068 assert self._start_rev is None or (
1063 1069 end_rev < self._start_rev or end_rev > self._end_rev
1064 1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 1071 cache = self.snapshots
1066 1072 if util.safehasattr(revlog.index, b'findsnapshots'):
1067 1073 revlog.index.findsnapshots(cache, start_rev, end_rev)
1068 1074 else:
1069 1075 deltaparent = revlog.deltaparent
1070 1076 issnapshot = revlog.issnapshot
1071 1077 for rev in revlog.revs(start_rev, end_rev):
1072 1078 if issnapshot(rev):
1073 1079 cache[deltaparent(rev)].add(rev)
1074 1080
1075 1081
1076 1082 class deltacomputer:
1077 1083 def __init__(
1078 1084 self,
1079 1085 revlog,
1080 1086 write_debug=None,
1081 1087 debug_search=False,
1082 1088 debug_info=None,
1083 1089 ):
1084 1090 self.revlog = revlog
1085 1091 self._write_debug = write_debug
1086 1092 self._debug_search = debug_search
1087 1093 self._debug_info = debug_info
1088 1094 self._snapshot_cache = SnapshotCache()
1089 1095
1090 1096 def buildtext(self, revinfo, fh):
1091 1097 """Builds a fulltext version of a revision
1092 1098
1093 1099 revinfo: revisioninfo instance that contains all needed info
1094 1100 fh: file handle to either the .i or the .d revlog file,
1095 1101 depending on whether it is inlined or not
1096 1102 """
1097 1103 btext = revinfo.btext
1098 1104 if btext[0] is not None:
1099 1105 return btext[0]
1100 1106
1101 1107 revlog = self.revlog
1102 1108 cachedelta = revinfo.cachedelta
1103 1109 baserev = cachedelta[0]
1104 1110 delta = cachedelta[1]
1105 1111
1106 1112 fulltext = btext[0] = _textfromdelta(
1107 1113 fh,
1108 1114 revlog,
1109 1115 baserev,
1110 1116 delta,
1111 1117 revinfo.p1,
1112 1118 revinfo.p2,
1113 1119 revinfo.flags,
1114 1120 revinfo.node,
1115 1121 )
1116 1122 return fulltext
1117 1123
1118 1124 def _builddeltadiff(self, base, revinfo, fh):
1119 1125 revlog = self.revlog
1120 1126 t = self.buildtext(revinfo, fh)
1121 1127 if revlog.iscensored(base):
1122 1128 # deltas based on a censored revision must replace the
1123 1129 # full content in one patch, so delta works everywhere
1124 1130 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1125 1131 delta = header + t
1126 1132 else:
1127 1133 ptext = revlog.rawdata(base, _df=fh)
1128 1134 delta = mdiff.textdiff(ptext, t)
1129 1135
1130 1136 return delta
1131 1137
1132 1138 def _builddeltainfo(self, revinfo, base, fh):
1133 1139 # can we use the cached delta?
1134 1140 revlog = self.revlog
1135 1141 debug_search = self._write_debug is not None and self._debug_search
1136 1142 chainbase = revlog.chainbase(base)
1137 1143 if revlog._generaldelta:
1138 1144 deltabase = base
1139 1145 else:
1140 1146 deltabase = chainbase
1141 1147 snapshotdepth = None
1142 1148 if revlog._sparserevlog and deltabase == nullrev:
1143 1149 snapshotdepth = 0
1144 1150 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1145 1151 # A delta chain should always be one full snapshot,
1146 1152 # zero or more semi-snapshots, and zero or more deltas
1147 1153 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1148 1154 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1149 1155 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1150 1156 delta = None
1151 1157 if revinfo.cachedelta:
1152 1158 cachebase = revinfo.cachedelta[0]
1153 1159 # check if the diff still apply
1154 1160 currentbase = cachebase
1155 1161 while (
1156 1162 currentbase != nullrev
1157 1163 and currentbase != base
1158 1164 and self.revlog.length(currentbase) == 0
1159 1165 ):
1160 1166 currentbase = self.revlog.deltaparent(currentbase)
1161 1167 if self.revlog._lazydelta and currentbase == base:
1162 1168 delta = revinfo.cachedelta[1]
1163 1169 if delta is None:
1164 1170 delta = self._builddeltadiff(base, revinfo, fh)
1165 1171 if debug_search:
1166 1172 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1167 1173 msg %= len(delta)
1168 1174 self._write_debug(msg)
1169 1175 # snapshotdept need to be neither None nor 0 level snapshot
1170 1176 if revlog.upperboundcomp is not None and snapshotdepth:
1171 1177 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1172 1178 snapshotlimit = revinfo.textlen >> snapshotdepth
1173 1179 if debug_search:
1174 1180 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1175 1181 msg %= lowestrealisticdeltalen
1176 1182 self._write_debug(msg)
1177 1183 if snapshotlimit < lowestrealisticdeltalen:
1178 1184 if debug_search:
1179 1185 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1180 1186 self._write_debug(msg)
1181 1187 return None
1182 1188 if revlog.length(base) < lowestrealisticdeltalen:
1183 1189 if debug_search:
1184 1190 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1185 1191 self._write_debug(msg)
1186 1192 return None
1187 1193 header, data = revlog.compress(delta)
1188 1194 deltalen = len(header) + len(data)
1189 1195 offset = revlog.end(len(revlog) - 1)
1190 1196 dist = deltalen + offset - revlog.start(chainbase)
1191 1197 chainlen, compresseddeltalen = revlog._chaininfo(base)
1192 1198 chainlen += 1
1193 1199 compresseddeltalen += deltalen
1194 1200
1195 1201 return _deltainfo(
1196 1202 dist,
1197 1203 deltalen,
1198 1204 (header, data),
1199 1205 deltabase,
1200 1206 chainbase,
1201 1207 chainlen,
1202 1208 compresseddeltalen,
1203 1209 snapshotdepth,
1204 1210 )
1205 1211
1206 1212 def _fullsnapshotinfo(self, fh, revinfo, curr):
1207 1213 rawtext = self.buildtext(revinfo, fh)
1208 1214 data = self.revlog.compress(rawtext)
1209 1215 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1210 1216 deltabase = chainbase = curr
1211 1217 snapshotdepth = 0
1212 1218 chainlen = 1
1213 1219
1214 1220 return _deltainfo(
1215 1221 dist,
1216 1222 deltalen,
1217 1223 data,
1218 1224 deltabase,
1219 1225 chainbase,
1220 1226 chainlen,
1221 1227 compresseddeltalen,
1222 1228 snapshotdepth,
1223 1229 )
1224 1230
1225 1231 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1226 1232 """Find an acceptable delta against a candidate revision
1227 1233
1228 1234 revinfo: information about the revision (instance of _revisioninfo)
1229 1235 fh: file handle to either the .i or the .d revlog file,
1230 1236 depending on whether it is inlined or not
1231 1237
1232 1238 Returns the first acceptable candidate revision, as ordered by
1233 1239 _candidategroups
1234 1240
1235 1241 If no suitable deltabase is found, we return delta info for a full
1236 1242 snapshot.
1237 1243
1238 1244 `excluded_bases` is an optional set of revision that cannot be used as
1239 1245 a delta base. Use this to recompute delta suitable in censor or strip
1240 1246 context.
1241 1247 """
1242 1248 if target_rev is None:
1243 1249 target_rev = len(self.revlog)
1244 1250
1245 1251 if not revinfo.textlen:
1246 1252 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1247 1253
1248 1254 if excluded_bases is None:
1249 1255 excluded_bases = set()
1250 1256
1251 1257 # no delta for flag processor revision (see "candelta" for why)
1252 1258 # not calling candelta since only one revision needs test, also to
1253 1259 # avoid overhead fetching flags again.
1254 1260 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1255 1261 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1256 1262
1257 1263 gather_debug = (
1258 1264 self._write_debug is not None or self._debug_info is not None
1259 1265 )
1260 1266 debug_search = self._write_debug is not None and self._debug_search
1261 1267
1262 1268 if gather_debug:
1263 1269 start = util.timer()
1264 1270
1265 1271 # count the number of different delta we tried (for debug purpose)
1266 1272 dbg_try_count = 0
1267 1273 # count the number of "search round" we did. (for debug purpose)
1268 1274 dbg_try_rounds = 0
1269 1275 dbg_type = b'unknown'
1270 1276
1271 1277 cachedelta = revinfo.cachedelta
1272 1278 p1 = revinfo.p1
1273 1279 p2 = revinfo.p2
1274 1280 revlog = self.revlog
1275 1281
1276 1282 deltainfo = None
1277 1283 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1278 1284
1279 1285 if gather_debug:
1280 1286 if p1r != nullrev:
1281 1287 p1_chain_len = revlog._chaininfo(p1r)[0]
1282 1288 else:
1283 1289 p1_chain_len = -1
1284 1290 if p2r != nullrev:
1285 1291 p2_chain_len = revlog._chaininfo(p2r)[0]
1286 1292 else:
1287 1293 p2_chain_len = -1
1288 1294 if debug_search:
1289 1295 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1290 1296 msg %= target_rev
1291 1297 self._write_debug(msg)
1292 1298
1293 1299 groups = _candidategroups(
1294 1300 self.revlog,
1295 1301 revinfo.textlen,
1296 1302 p1r,
1297 1303 p2r,
1298 1304 cachedelta,
1299 1305 excluded_bases,
1300 1306 target_rev,
1301 1307 snapshot_cache=self._snapshot_cache,
1302 1308 )
1303 1309 candidaterevs = next(groups)
1304 1310 while candidaterevs is not None:
1305 1311 dbg_try_rounds += 1
1306 1312 if debug_search:
1307 1313 prev = None
1308 1314 if deltainfo is not None:
1309 1315 prev = deltainfo.base
1310 1316
1311 1317 if (
1312 1318 cachedelta is not None
1313 1319 and len(candidaterevs) == 1
1314 1320 and cachedelta[0] in candidaterevs
1315 1321 ):
1316 1322 round_type = b"cached-delta"
1317 1323 elif p1 in candidaterevs or p2 in candidaterevs:
1318 1324 round_type = b"parents"
1319 1325 elif prev is not None and all(c < prev for c in candidaterevs):
1320 1326 round_type = b"refine-down"
1321 1327 elif prev is not None and all(c > prev for c in candidaterevs):
1322 1328 round_type = b"refine-up"
1323 1329 else:
1324 1330 round_type = b"search-down"
1325 1331 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1326 1332 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1327 1333 self._write_debug(msg)
1328 1334 nominateddeltas = []
1329 1335 if deltainfo is not None:
1330 1336 if debug_search:
1331 1337 msg = (
1332 1338 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1333 1339 )
1334 1340 msg %= (deltainfo.base, deltainfo.deltalen)
1335 1341 self._write_debug(msg)
1336 1342 # if we already found a good delta,
1337 1343 # challenge it against refined candidates
1338 1344 nominateddeltas.append(deltainfo)
1339 1345 for candidaterev in candidaterevs:
1340 1346 if debug_search:
1341 1347 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1342 1348 msg %= candidaterev
1343 1349 self._write_debug(msg)
1344 1350 candidate_type = None
1345 1351 if candidaterev == p1:
1346 1352 candidate_type = b"p1"
1347 1353 elif candidaterev == p2:
1348 1354 candidate_type = b"p2"
1349 1355 elif self.revlog.issnapshot(candidaterev):
1350 1356 candidate_type = b"snapshot-%d"
1351 1357 candidate_type %= self.revlog.snapshotdepth(
1352 1358 candidaterev
1353 1359 )
1354 1360
1355 1361 if candidate_type is not None:
1356 1362 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1357 1363 msg %= candidate_type
1358 1364 self._write_debug(msg)
1359 1365 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1360 1366 msg %= self.revlog.length(candidaterev)
1361 1367 self._write_debug(msg)
1362 1368 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1363 1369 msg %= self.revlog.deltaparent(candidaterev)
1364 1370 self._write_debug(msg)
1365 1371
1366 1372 dbg_try_count += 1
1367 1373
1368 1374 if debug_search:
1369 1375 delta_start = util.timer()
1370 1376 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1371 1377 if debug_search:
1372 1378 delta_end = util.timer()
1373 1379 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1374 1380 msg %= delta_end - delta_start
1375 1381 self._write_debug(msg)
1376 1382 if candidatedelta is not None:
1377 1383 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1378 1384 if debug_search:
1379 1385 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1380 1386 msg %= candidatedelta.deltalen
1381 1387 self._write_debug(msg)
1382 1388 nominateddeltas.append(candidatedelta)
1383 1389 elif debug_search:
1384 1390 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1385 1391 msg %= candidatedelta.deltalen
1386 1392 self._write_debug(msg)
1387 1393 elif debug_search:
1388 1394 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1389 1395 self._write_debug(msg)
1390 1396 if nominateddeltas:
1391 1397 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1392 1398 if deltainfo is not None:
1393 1399 candidaterevs = groups.send(deltainfo.base)
1394 1400 else:
1395 1401 candidaterevs = next(groups)
1396 1402
1397 1403 if deltainfo is None:
1398 1404 dbg_type = b"full"
1399 1405 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1400 1406 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1401 1407 dbg_type = b"snapshot"
1402 1408 else:
1403 1409 dbg_type = b"delta"
1404 1410
1405 1411 if gather_debug:
1406 1412 end = util.timer()
1407 1413 if dbg_type == b'full':
1408 1414 used_cached = (
1409 1415 cachedelta is not None
1410 1416 and dbg_try_rounds == 0
1411 1417 and dbg_try_count == 0
1412 1418 and cachedelta[0] == nullrev
1413 1419 )
1414 1420 else:
1415 1421 used_cached = (
1416 1422 cachedelta is not None
1417 1423 and dbg_try_rounds == 1
1418 1424 and dbg_try_count == 1
1419 1425 and deltainfo.base == cachedelta[0]
1420 1426 )
1421 1427 dbg = {
1422 1428 'duration': end - start,
1423 1429 'revision': target_rev,
1424 1430 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1425 1431 'search_round_count': dbg_try_rounds,
1426 1432 'using-cached-base': used_cached,
1427 1433 'delta_try_count': dbg_try_count,
1428 1434 'type': dbg_type,
1429 1435 'p1-chain-len': p1_chain_len,
1430 1436 'p2-chain-len': p2_chain_len,
1431 1437 }
1432 1438 if (
1433 1439 deltainfo.snapshotdepth # pytype: disable=attribute-error
1434 1440 is not None
1435 1441 ):
1436 1442 dbg[
1437 1443 'snapshot-depth'
1438 1444 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1439 1445 else:
1440 1446 dbg['snapshot-depth'] = 0
1441 1447 target_revlog = b"UNKNOWN"
1442 1448 target_type = self.revlog.target[0]
1443 1449 target_key = self.revlog.target[1]
1444 1450 if target_type == KIND_CHANGELOG:
1445 1451 target_revlog = b'CHANGELOG:'
1446 1452 elif target_type == KIND_MANIFESTLOG:
1447 1453 target_revlog = b'MANIFESTLOG:'
1448 1454 if target_key:
1449 1455 target_revlog += b'%s:' % target_key
1450 1456 elif target_type == KIND_FILELOG:
1451 1457 target_revlog = b'FILELOG:'
1452 1458 if target_key:
1453 1459 target_revlog += b'%s:' % target_key
1454 1460 dbg['target-revlog'] = target_revlog
1455 1461
1456 1462 if self._debug_info is not None:
1457 1463 self._debug_info.append(dbg)
1458 1464
1459 1465 if self._write_debug is not None:
1460 1466 msg = (
1461 1467 b"DBG-DELTAS:"
1462 1468 b" %-12s"
1463 1469 b" rev=%d:"
1464 1470 b" delta-base=%d"
1465 1471 b" is-cached=%d"
1466 1472 b" - search-rounds=%d"
1467 1473 b" try-count=%d"
1468 1474 b" - delta-type=%-6s"
1469 1475 b" snap-depth=%d"
1470 1476 b" - p1-chain-length=%d"
1471 1477 b" p2-chain-length=%d"
1472 1478 b" - duration=%f"
1473 1479 b"\n"
1474 1480 )
1475 1481 msg %= (
1476 1482 dbg["target-revlog"],
1477 1483 dbg["revision"],
1478 1484 dbg["delta-base"],
1479 1485 dbg["using-cached-base"],
1480 1486 dbg["search_round_count"],
1481 1487 dbg["delta_try_count"],
1482 1488 dbg["type"],
1483 1489 dbg["snapshot-depth"],
1484 1490 dbg["p1-chain-len"],
1485 1491 dbg["p2-chain-len"],
1486 1492 dbg["duration"],
1487 1493 )
1488 1494 self._write_debug(msg)
1489 1495 return deltainfo
1490 1496
1491 1497
1492 1498 def delta_compression(default_compression_header, deltainfo):
1493 1499 """return (COMPRESSION_MODE, deltainfo)
1494 1500
1495 1501 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1496 1502 compression.
1497 1503 """
1498 1504 h, d = deltainfo.data
1499 1505 compression_mode = COMP_MODE_INLINE
1500 1506 if not h and not d:
1501 1507 # not data to store at all... declare them uncompressed
1502 1508 compression_mode = COMP_MODE_PLAIN
1503 1509 elif not h:
1504 1510 t = d[0:1]
1505 1511 if t == b'\0':
1506 1512 compression_mode = COMP_MODE_PLAIN
1507 1513 elif t == default_compression_header:
1508 1514 compression_mode = COMP_MODE_DEFAULT
1509 1515 elif h == b'u':
1510 1516 # we have a more efficient way to declare uncompressed
1511 1517 h = b''
1512 1518 compression_mode = COMP_MODE_PLAIN
1513 1519 deltainfo = drop_u_compression(deltainfo)
1514 1520 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now