##// END OF EJS Templates
delta-find: move sparse-revlog pre-filtering in the associated class...
marmoute -
r52252:a224ce56 default
parent child Browse files
Show More
@@ -1,1881 +1,1887 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import abc
11 import abc
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 from .. import revlog
51 from .. import revlog
52
52
53 self._data = data
53 self._data = data
54 self.data_config = revlog.DataConfig()
54 self.data_config = revlog.DataConfig()
55 self.data_config.sr_density_threshold = density
55 self.data_config.sr_density_threshold = density
56 self.data_config.sr_min_gap_size = mingap
56 self.data_config.sr_min_gap_size = mingap
57 self.delta_config = revlog.DeltaConfig()
57 self.delta_config = revlog.DeltaConfig()
58 self.feature_config = revlog.FeatureConfig()
58 self.feature_config = revlog.FeatureConfig()
59 self._snapshot = set(snapshot)
59 self._snapshot = set(snapshot)
60 self.index = None
60 self.index = None
61
61
62 def start(self, rev):
62 def start(self, rev):
63 if rev == nullrev:
63 if rev == nullrev:
64 return 0
64 return 0
65 if rev == 0:
65 if rev == 0:
66 return 0
66 return 0
67 return self._data[rev - 1]
67 return self._data[rev - 1]
68
68
69 def end(self, rev):
69 def end(self, rev):
70 if rev == nullrev:
70 if rev == nullrev:
71 return 0
71 return 0
72 return self._data[rev]
72 return self._data[rev]
73
73
74 def length(self, rev):
74 def length(self, rev):
75 return self.end(rev) - self.start(rev)
75 return self.end(rev) - self.start(rev)
76
76
77 def __len__(self):
77 def __len__(self):
78 return len(self._data)
78 return len(self._data)
79
79
80 def issnapshot(self, rev):
80 def issnapshot(self, rev):
81 if rev == nullrev:
81 if rev == nullrev:
82 return True
82 return True
83 return rev in self._snapshot
83 return rev in self._snapshot
84
84
85
85
86 def slicechunk(revlog, revs, targetsize=None):
86 def slicechunk(revlog, revs, targetsize=None):
87 """slice revs to reduce the amount of unrelated data to be read from disk.
87 """slice revs to reduce the amount of unrelated data to be read from disk.
88
88
89 ``revs`` is sliced into groups that should be read in one time.
89 ``revs`` is sliced into groups that should be read in one time.
90 Assume that revs are sorted.
90 Assume that revs are sorted.
91
91
92 The initial chunk is sliced until the overall density (payload/chunks-span
92 The initial chunk is sliced until the overall density (payload/chunks-span
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 than `revlog.data_config.sr_min_gap_size` is skipped.
94 than `revlog.data_config.sr_min_gap_size` is skipped.
95
95
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 For consistency with other slicing choice, this limit won't go lower than
97 For consistency with other slicing choice, this limit won't go lower than
98 `revlog.data_config.sr_min_gap_size`.
98 `revlog.data_config.sr_min_gap_size`.
99
99
100 If individual revisions chunk are larger than this limit, they will still
100 If individual revisions chunk are larger than this limit, they will still
101 be raised individually.
101 be raised individually.
102
102
103 >>> data = [
103 >>> data = [
104 ... 5, #00 (5)
104 ... 5, #00 (5)
105 ... 10, #01 (5)
105 ... 10, #01 (5)
106 ... 12, #02 (2)
106 ... 12, #02 (2)
107 ... 12, #03 (empty)
107 ... 12, #03 (empty)
108 ... 27, #04 (15)
108 ... 27, #04 (15)
109 ... 31, #05 (4)
109 ... 31, #05 (4)
110 ... 31, #06 (empty)
110 ... 31, #06 (empty)
111 ... 42, #07 (11)
111 ... 42, #07 (11)
112 ... 47, #08 (5)
112 ... 47, #08 (5)
113 ... 47, #09 (empty)
113 ... 47, #09 (empty)
114 ... 48, #10 (1)
114 ... 48, #10 (1)
115 ... 51, #11 (3)
115 ... 51, #11 (3)
116 ... 74, #12 (23)
116 ... 74, #12 (23)
117 ... 85, #13 (11)
117 ... 85, #13 (11)
118 ... 86, #14 (1)
118 ... 86, #14 (1)
119 ... 91, #15 (5)
119 ... 91, #15 (5)
120 ... ]
120 ... ]
121 >>> revlog = _testrevlog(data, snapshot=range(16))
121 >>> revlog = _testrevlog(data, snapshot=range(16))
122
122
123 >>> list(slicechunk(revlog, list(range(16))))
123 >>> list(slicechunk(revlog, list(range(16))))
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 >>> list(slicechunk(revlog, [0, 15]))
125 >>> list(slicechunk(revlog, [0, 15]))
126 [[0], [15]]
126 [[0], [15]]
127 >>> list(slicechunk(revlog, [0, 11, 15]))
127 >>> list(slicechunk(revlog, [0, 11, 15]))
128 [[0], [11], [15]]
128 [[0], [11], [15]]
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 [[0], [11, 13, 15]]
130 [[0], [11, 13, 15]]
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 [[1, 2], [5, 8, 10, 11], [14]]
132 [[1, 2], [5, 8, 10, 11], [14]]
133
133
134 Slicing with a maximum chunk size
134 Slicing with a maximum chunk size
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 [[0], [11], [13], [15]]
136 [[0], [11], [13], [15]]
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 [[0], [11], [13, 15]]
138 [[0], [11], [13, 15]]
139
139
140 Slicing involving nullrev
140 Slicing involving nullrev
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 [[-1, 0], [11], [13, 15]]
142 [[-1, 0], [11], [13, 15]]
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 [[-1], [13], [15]]
144 [[-1], [13], [15]]
145 """
145 """
146 if targetsize is not None:
146 if targetsize is not None:
147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 # targetsize should not be specified when evaluating delta candidates:
148 # targetsize should not be specified when evaluating delta candidates:
149 # * targetsize is used to ensure we stay within specification when reading,
149 # * targetsize is used to ensure we stay within specification when reading,
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 if densityslicing is None:
151 if densityslicing is None:
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 for chunk in densityslicing(
153 for chunk in densityslicing(
154 revs,
154 revs,
155 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_density_threshold,
156 revlog.data_config.sr_min_gap_size,
156 revlog.data_config.sr_min_gap_size,
157 ):
157 ):
158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
159 yield subchunk
159 yield subchunk
160
160
161
161
162 def _slicechunktosize(revlog, revs, targetsize=None):
162 def _slicechunktosize(revlog, revs, targetsize=None):
163 """slice revs to match the target size
163 """slice revs to match the target size
164
164
165 This is intended to be used on chunk that density slicing selected by that
165 This is intended to be used on chunk that density slicing selected by that
166 are still too large compared to the read garantee of revlog. This might
166 are still too large compared to the read garantee of revlog. This might
167 happens when "minimal gap size" interrupted the slicing or when chain are
167 happens when "minimal gap size" interrupted the slicing or when chain are
168 built in a way that create large blocks next to each other.
168 built in a way that create large blocks next to each other.
169
169
170 >>> data = [
170 >>> data = [
171 ... 3, #0 (3)
171 ... 3, #0 (3)
172 ... 5, #1 (2)
172 ... 5, #1 (2)
173 ... 6, #2 (1)
173 ... 6, #2 (1)
174 ... 8, #3 (2)
174 ... 8, #3 (2)
175 ... 8, #4 (empty)
175 ... 8, #4 (empty)
176 ... 11, #5 (3)
176 ... 11, #5 (3)
177 ... 12, #6 (1)
177 ... 12, #6 (1)
178 ... 13, #7 (1)
178 ... 13, #7 (1)
179 ... 14, #8 (1)
179 ... 14, #8 (1)
180 ... ]
180 ... ]
181
181
182 == All snapshots cases ==
182 == All snapshots cases ==
183 >>> revlog = _testrevlog(data, snapshot=range(9))
183 >>> revlog = _testrevlog(data, snapshot=range(9))
184
184
185 Cases where chunk is already small enough
185 Cases where chunk is already small enough
186 >>> list(_slicechunktosize(revlog, [0], 3))
186 >>> list(_slicechunktosize(revlog, [0], 3))
187 [[0]]
187 [[0]]
188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
189 [[6, 7]]
189 [[6, 7]]
190 >>> list(_slicechunktosize(revlog, [0], None))
190 >>> list(_slicechunktosize(revlog, [0], None))
191 [[0]]
191 [[0]]
192 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 >>> list(_slicechunktosize(revlog, [6, 7], None))
193 [[6, 7]]
193 [[6, 7]]
194
194
195 cases where we need actual slicing
195 cases where we need actual slicing
196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
197 [[0], [1]]
197 [[0], [1]]
198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
199 [[1], [3]]
199 [[1], [3]]
200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
201 [[1, 2], [3]]
201 [[1, 2], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
203 [[3], [5]]
203 [[3], [5]]
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
205 [[3], [5]]
205 [[3], [5]]
206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
207 [[5], [6, 7, 8]]
207 [[5], [6, 7, 8]]
208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
209 [[0], [1, 2], [3], [5], [6, 7, 8]]
209 [[0], [1, 2], [3], [5], [6, 7, 8]]
210
210
211 Case with too large individual chunk (must return valid chunk)
211 Case with too large individual chunk (must return valid chunk)
212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
213 [[0], [1]]
213 [[0], [1]]
214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
215 [[1], [3]]
215 [[1], [3]]
216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
217 [[3], [5]]
217 [[3], [5]]
218
218
219 == No Snapshot cases ==
219 == No Snapshot cases ==
220 >>> revlog = _testrevlog(data)
220 >>> revlog = _testrevlog(data)
221
221
222 Cases where chunk is already small enough
222 Cases where chunk is already small enough
223 >>> list(_slicechunktosize(revlog, [0], 3))
223 >>> list(_slicechunktosize(revlog, [0], 3))
224 [[0]]
224 [[0]]
225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
226 [[6, 7]]
226 [[6, 7]]
227 >>> list(_slicechunktosize(revlog, [0], None))
227 >>> list(_slicechunktosize(revlog, [0], None))
228 [[0]]
228 [[0]]
229 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 >>> list(_slicechunktosize(revlog, [6, 7], None))
230 [[6, 7]]
230 [[6, 7]]
231
231
232 cases where we need actual slicing
232 cases where we need actual slicing
233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
234 [[0], [1]]
234 [[0], [1]]
235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
236 [[1], [3]]
236 [[1], [3]]
237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
238 [[1], [2, 3]]
238 [[1], [2, 3]]
239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
240 [[3], [5]]
240 [[3], [5]]
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
242 [[3], [4, 5]]
242 [[3], [4, 5]]
243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
244 [[5], [6, 7, 8]]
244 [[5], [6, 7, 8]]
245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
246 [[0], [1, 2], [3], [5], [6, 7, 8]]
246 [[0], [1, 2], [3], [5], [6, 7, 8]]
247
247
248 Case with too large individual chunk (must return valid chunk)
248 Case with too large individual chunk (must return valid chunk)
249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
250 [[0], [1]]
250 [[0], [1]]
251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
252 [[1], [3]]
252 [[1], [3]]
253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
254 [[3], [5]]
254 [[3], [5]]
255
255
256 == mixed case ==
256 == mixed case ==
257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
260 """
260 """
261 assert targetsize is None or 0 <= targetsize
261 assert targetsize is None or 0 <= targetsize
262 startdata = revlog.start(revs[0])
262 startdata = revlog.start(revs[0])
263 enddata = revlog.end(revs[-1])
263 enddata = revlog.end(revs[-1])
264 fullspan = enddata - startdata
264 fullspan = enddata - startdata
265 if targetsize is None or fullspan <= targetsize:
265 if targetsize is None or fullspan <= targetsize:
266 yield revs
266 yield revs
267 return
267 return
268
268
269 startrevidx = 0
269 startrevidx = 0
270 endrevidx = 1
270 endrevidx = 1
271 iterrevs = enumerate(revs)
271 iterrevs = enumerate(revs)
272 next(iterrevs) # skip first rev.
272 next(iterrevs) # skip first rev.
273 # first step: get snapshots out of the way
273 # first step: get snapshots out of the way
274 for idx, r in iterrevs:
274 for idx, r in iterrevs:
275 span = revlog.end(r) - startdata
275 span = revlog.end(r) - startdata
276 snapshot = revlog.issnapshot(r)
276 snapshot = revlog.issnapshot(r)
277 if span <= targetsize and snapshot:
277 if span <= targetsize and snapshot:
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 else:
279 else:
280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
281 if chunk:
281 if chunk:
282 yield chunk
282 yield chunk
283 startrevidx = idx
283 startrevidx = idx
284 startdata = revlog.start(r)
284 startdata = revlog.start(r)
285 endrevidx = idx + 1
285 endrevidx = idx + 1
286 if not snapshot:
286 if not snapshot:
287 break
287 break
288
288
289 # for the others, we use binary slicing to quickly converge toward valid
289 # for the others, we use binary slicing to quickly converge toward valid
290 # chunks (otherwise, we might end up looking for start/end of many
290 # chunks (otherwise, we might end up looking for start/end of many
291 # revisions). This logic is not looking for the perfect slicing point, it
291 # revisions). This logic is not looking for the perfect slicing point, it
292 # focuses on quickly converging toward valid chunks.
292 # focuses on quickly converging toward valid chunks.
293 nbitem = len(revs)
293 nbitem = len(revs)
294 while (enddata - startdata) > targetsize:
294 while (enddata - startdata) > targetsize:
295 endrevidx = nbitem
295 endrevidx = nbitem
296 if nbitem - startrevidx <= 1:
296 if nbitem - startrevidx <= 1:
297 break # protect against individual chunk larger than limit
297 break # protect against individual chunk larger than limit
298 localenddata = revlog.end(revs[endrevidx - 1])
298 localenddata = revlog.end(revs[endrevidx - 1])
299 span = localenddata - startdata
299 span = localenddata - startdata
300 while span > targetsize:
300 while span > targetsize:
301 if endrevidx - startrevidx <= 1:
301 if endrevidx - startrevidx <= 1:
302 break # protect against individual chunk larger than limit
302 break # protect against individual chunk larger than limit
303 endrevidx -= (endrevidx - startrevidx) // 2
303 endrevidx -= (endrevidx - startrevidx) // 2
304 localenddata = revlog.end(revs[endrevidx - 1])
304 localenddata = revlog.end(revs[endrevidx - 1])
305 span = localenddata - startdata
305 span = localenddata - startdata
306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
307 if chunk:
307 if chunk:
308 yield chunk
308 yield chunk
309 startrevidx = endrevidx
309 startrevidx = endrevidx
310 startdata = revlog.start(revs[startrevidx])
310 startdata = revlog.start(revs[startrevidx])
311
311
312 chunk = _trimchunk(revlog, revs, startrevidx)
312 chunk = _trimchunk(revlog, revs, startrevidx)
313 if chunk:
313 if chunk:
314 yield chunk
314 yield chunk
315
315
316
316
317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
318 """slice revs to reduce the amount of unrelated data to be read from disk.
318 """slice revs to reduce the amount of unrelated data to be read from disk.
319
319
320 ``revs`` is sliced into groups that should be read in one time.
320 ``revs`` is sliced into groups that should be read in one time.
321 Assume that revs are sorted.
321 Assume that revs are sorted.
322
322
323 The initial chunk is sliced until the overall density (payload/chunks-span
323 The initial chunk is sliced until the overall density (payload/chunks-span
324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
325 skipped.
325 skipped.
326
326
327 >>> revlog = _testrevlog([
327 >>> revlog = _testrevlog([
328 ... 5, #00 (5)
328 ... 5, #00 (5)
329 ... 10, #01 (5)
329 ... 10, #01 (5)
330 ... 12, #02 (2)
330 ... 12, #02 (2)
331 ... 12, #03 (empty)
331 ... 12, #03 (empty)
332 ... 27, #04 (15)
332 ... 27, #04 (15)
333 ... 31, #05 (4)
333 ... 31, #05 (4)
334 ... 31, #06 (empty)
334 ... 31, #06 (empty)
335 ... 42, #07 (11)
335 ... 42, #07 (11)
336 ... 47, #08 (5)
336 ... 47, #08 (5)
337 ... 47, #09 (empty)
337 ... 47, #09 (empty)
338 ... 48, #10 (1)
338 ... 48, #10 (1)
339 ... 51, #11 (3)
339 ... 51, #11 (3)
340 ... 74, #12 (23)
340 ... 74, #12 (23)
341 ... 85, #13 (11)
341 ... 85, #13 (11)
342 ... 86, #14 (1)
342 ... 86, #14 (1)
343 ... 91, #15 (5)
343 ... 91, #15 (5)
344 ... ])
344 ... ])
345
345
346 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 >>> list(_slicechunktodensity(revlog, list(range(16))))
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 >>> list(_slicechunktodensity(revlog, [0, 15]))
349 [[0], [15]]
349 [[0], [15]]
350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
351 [[0], [11], [15]]
351 [[0], [11], [15]]
352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
353 [[0], [11, 13, 15]]
353 [[0], [11, 13, 15]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 ... mingapsize=20))
357 ... mingapsize=20))
358 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 [[1, 2, 3, 5, 8, 10, 11], [14]]
359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 ... targetdensity=0.95))
360 ... targetdensity=0.95))
361 [[1, 2], [5], [8, 10, 11], [14]]
361 [[1, 2], [5], [8, 10, 11], [14]]
362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
363 ... targetdensity=0.95, mingapsize=12))
363 ... targetdensity=0.95, mingapsize=12))
364 [[1, 2], [5, 8, 10, 11], [14]]
364 [[1, 2], [5, 8, 10, 11], [14]]
365 """
365 """
366 start = revlog.start
366 start = revlog.start
367 length = revlog.length
367 length = revlog.length
368
368
369 if len(revs) <= 1:
369 if len(revs) <= 1:
370 yield revs
370 yield revs
371 return
371 return
372
372
373 deltachainspan = segmentspan(revlog, revs)
373 deltachainspan = segmentspan(revlog, revs)
374
374
375 if deltachainspan < mingapsize:
375 if deltachainspan < mingapsize:
376 yield revs
376 yield revs
377 return
377 return
378
378
379 readdata = deltachainspan
379 readdata = deltachainspan
380 chainpayload = sum(length(r) for r in revs)
380 chainpayload = sum(length(r) for r in revs)
381
381
382 if deltachainspan:
382 if deltachainspan:
383 density = chainpayload / float(deltachainspan)
383 density = chainpayload / float(deltachainspan)
384 else:
384 else:
385 density = 1.0
385 density = 1.0
386
386
387 if density >= targetdensity:
387 if density >= targetdensity:
388 yield revs
388 yield revs
389 return
389 return
390
390
391 # Store the gaps in a heap to have them sorted by decreasing size
391 # Store the gaps in a heap to have them sorted by decreasing size
392 gaps = []
392 gaps = []
393 prevend = None
393 prevend = None
394 for i, rev in enumerate(revs):
394 for i, rev in enumerate(revs):
395 revstart = start(rev)
395 revstart = start(rev)
396 revlen = length(rev)
396 revlen = length(rev)
397
397
398 # Skip empty revisions to form larger holes
398 # Skip empty revisions to form larger holes
399 if revlen == 0:
399 if revlen == 0:
400 continue
400 continue
401
401
402 if prevend is not None:
402 if prevend is not None:
403 gapsize = revstart - prevend
403 gapsize = revstart - prevend
404 # only consider holes that are large enough
404 # only consider holes that are large enough
405 if gapsize > mingapsize:
405 if gapsize > mingapsize:
406 gaps.append((gapsize, i))
406 gaps.append((gapsize, i))
407
407
408 prevend = revstart + revlen
408 prevend = revstart + revlen
409 # sort the gaps to pop them from largest to small
409 # sort the gaps to pop them from largest to small
410 gaps.sort()
410 gaps.sort()
411
411
412 # Collect the indices of the largest holes until the density is acceptable
412 # Collect the indices of the largest holes until the density is acceptable
413 selected = []
413 selected = []
414 while gaps and density < targetdensity:
414 while gaps and density < targetdensity:
415 gapsize, gapidx = gaps.pop()
415 gapsize, gapidx = gaps.pop()
416
416
417 selected.append(gapidx)
417 selected.append(gapidx)
418
418
419 # the gap sizes are stored as negatives to be sorted decreasingly
419 # the gap sizes are stored as negatives to be sorted decreasingly
420 # by the heap
420 # by the heap
421 readdata -= gapsize
421 readdata -= gapsize
422 if readdata > 0:
422 if readdata > 0:
423 density = chainpayload / float(readdata)
423 density = chainpayload / float(readdata)
424 else:
424 else:
425 density = 1.0
425 density = 1.0
426 selected.sort()
426 selected.sort()
427
427
428 # Cut the revs at collected indices
428 # Cut the revs at collected indices
429 previdx = 0
429 previdx = 0
430 for idx in selected:
430 for idx in selected:
431
431
432 chunk = _trimchunk(revlog, revs, previdx, idx)
432 chunk = _trimchunk(revlog, revs, previdx, idx)
433 if chunk:
433 if chunk:
434 yield chunk
434 yield chunk
435
435
436 previdx = idx
436 previdx = idx
437
437
438 chunk = _trimchunk(revlog, revs, previdx)
438 chunk = _trimchunk(revlog, revs, previdx)
439 if chunk:
439 if chunk:
440 yield chunk
440 yield chunk
441
441
442
442
443 def _trimchunk(revlog, revs, startidx, endidx=None):
443 def _trimchunk(revlog, revs, startidx, endidx=None):
444 """returns revs[startidx:endidx] without empty trailing revs
444 """returns revs[startidx:endidx] without empty trailing revs
445
445
446 Doctest Setup
446 Doctest Setup
447 >>> revlog = _testrevlog([
447 >>> revlog = _testrevlog([
448 ... 5, #0
448 ... 5, #0
449 ... 10, #1
449 ... 10, #1
450 ... 12, #2
450 ... 12, #2
451 ... 12, #3 (empty)
451 ... 12, #3 (empty)
452 ... 17, #4
452 ... 17, #4
453 ... 21, #5
453 ... 21, #5
454 ... 21, #6 (empty)
454 ... 21, #6 (empty)
455 ... ])
455 ... ])
456
456
457 Contiguous cases:
457 Contiguous cases:
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
459 [0, 1, 2, 3, 4, 5]
459 [0, 1, 2, 3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
461 [0, 1, 2, 3, 4]
461 [0, 1, 2, 3, 4]
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
463 [0, 1, 2]
463 [0, 1, 2]
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
465 [2]
465 [2]
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
467 [3, 4, 5]
467 [3, 4, 5]
468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
469 [3, 4]
469 [3, 4]
470
470
471 Discontiguous cases:
471 Discontiguous cases:
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
473 [1, 3, 5]
473 [1, 3, 5]
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
475 [1]
475 [1]
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
477 [3, 5]
477 [3, 5]
478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
479 [3, 5]
479 [3, 5]
480 """
480 """
481 length = revlog.length
481 length = revlog.length
482
482
483 if endidx is None:
483 if endidx is None:
484 endidx = len(revs)
484 endidx = len(revs)
485
485
486 # If we have a non-emtpy delta candidate, there are nothing to trim
486 # If we have a non-emtpy delta candidate, there are nothing to trim
487 if revs[endidx - 1] < len(revlog):
487 if revs[endidx - 1] < len(revlog):
488 # Trim empty revs at the end, except the very first revision of a chain
488 # Trim empty revs at the end, except the very first revision of a chain
489 while (
489 while (
490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
491 ):
491 ):
492 endidx -= 1
492 endidx -= 1
493
493
494 return revs[startidx:endidx]
494 return revs[startidx:endidx]
495
495
496
496
497 def segmentspan(revlog, revs):
497 def segmentspan(revlog, revs):
498 """Get the byte span of a segment of revisions
498 """Get the byte span of a segment of revisions
499
499
500 revs is a sorted array of revision numbers
500 revs is a sorted array of revision numbers
501
501
502 >>> revlog = _testrevlog([
502 >>> revlog = _testrevlog([
503 ... 5, #0
503 ... 5, #0
504 ... 10, #1
504 ... 10, #1
505 ... 12, #2
505 ... 12, #2
506 ... 12, #3 (empty)
506 ... 12, #3 (empty)
507 ... 17, #4
507 ... 17, #4
508 ... ])
508 ... ])
509
509
510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
511 17
511 17
512 >>> segmentspan(revlog, [0, 4])
512 >>> segmentspan(revlog, [0, 4])
513 17
513 17
514 >>> segmentspan(revlog, [3, 4])
514 >>> segmentspan(revlog, [3, 4])
515 5
515 5
516 >>> segmentspan(revlog, [1, 2, 3,])
516 >>> segmentspan(revlog, [1, 2, 3,])
517 7
517 7
518 >>> segmentspan(revlog, [1, 3])
518 >>> segmentspan(revlog, [1, 3])
519 7
519 7
520 """
520 """
521 if not revs:
521 if not revs:
522 return 0
522 return 0
523 end = revlog.end(revs[-1])
523 end = revlog.end(revs[-1])
524 return end - revlog.start(revs[0])
524 return end - revlog.start(revs[0])
525
525
526
526
527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
528 """build full text from a (base, delta) pair and other metadata"""
528 """build full text from a (base, delta) pair and other metadata"""
529 # special case deltas which replace entire base; no need to decode
529 # special case deltas which replace entire base; no need to decode
530 # base revision. this neatly avoids censored bases, which throw when
530 # base revision. this neatly avoids censored bases, which throw when
531 # they're decoded.
531 # they're decoded.
532 hlen = struct.calcsize(b">lll")
532 hlen = struct.calcsize(b">lll")
533 if delta[:hlen] == mdiff.replacediffheader(
533 if delta[:hlen] == mdiff.replacediffheader(
534 revlog.rawsize(baserev), len(delta) - hlen
534 revlog.rawsize(baserev), len(delta) - hlen
535 ):
535 ):
536 fulltext = delta[hlen:]
536 fulltext = delta[hlen:]
537 else:
537 else:
538 # deltabase is rawtext before changed by flag processors, which is
538 # deltabase is rawtext before changed by flag processors, which is
539 # equivalent to non-raw text
539 # equivalent to non-raw text
540 basetext = revlog.revision(baserev)
540 basetext = revlog.revision(baserev)
541 fulltext = mdiff.patch(basetext, delta)
541 fulltext = mdiff.patch(basetext, delta)
542
542
543 try:
543 try:
544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
545 if validatehash:
545 if validatehash:
546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
547 if flags & REVIDX_ISCENSORED:
547 if flags & REVIDX_ISCENSORED:
548 raise error.StorageError(
548 raise error.StorageError(
549 _(b'node %s is not censored') % expectednode
549 _(b'node %s is not censored') % expectednode
550 )
550 )
551 except error.CensoredNodeError:
551 except error.CensoredNodeError:
552 # must pass the censored index flag to add censored revisions
552 # must pass the censored index flag to add censored revisions
553 if not flags & REVIDX_ISCENSORED:
553 if not flags & REVIDX_ISCENSORED:
554 raise
554 raise
555 return fulltext
555 return fulltext
556
556
557
557
558 @attr.s(slots=True, frozen=True)
558 @attr.s(slots=True, frozen=True)
559 class _deltainfo:
559 class _deltainfo:
560 distance = attr.ib()
560 distance = attr.ib()
561 deltalen = attr.ib()
561 deltalen = attr.ib()
562 data = attr.ib()
562 data = attr.ib()
563 base = attr.ib()
563 base = attr.ib()
564 chainbase = attr.ib()
564 chainbase = attr.ib()
565 chainlen = attr.ib()
565 chainlen = attr.ib()
566 compresseddeltalen = attr.ib()
566 compresseddeltalen = attr.ib()
567 snapshotdepth = attr.ib()
567 snapshotdepth = attr.ib()
568
568
569
569
570 def drop_u_compression(delta):
570 def drop_u_compression(delta):
571 """turn into a "u" (no-compression) into no-compression without header
571 """turn into a "u" (no-compression) into no-compression without header
572
572
573 This is useful for revlog format that has better compression method.
573 This is useful for revlog format that has better compression method.
574 """
574 """
575 assert delta.data[0] == b'u', delta.data[0]
575 assert delta.data[0] == b'u', delta.data[0]
576 return _deltainfo(
576 return _deltainfo(
577 delta.distance,
577 delta.distance,
578 delta.deltalen - 1,
578 delta.deltalen - 1,
579 (b'', delta.data[1]),
579 (b'', delta.data[1]),
580 delta.base,
580 delta.base,
581 delta.chainbase,
581 delta.chainbase,
582 delta.chainlen,
582 delta.chainlen,
583 delta.compresseddeltalen,
583 delta.compresseddeltalen,
584 delta.snapshotdepth,
584 delta.snapshotdepth,
585 )
585 )
586
586
587
587
588 # If a revision's full text is that much bigger than a base candidate full
588 # If a revision's full text is that much bigger than a base candidate full
589 # text's, it is very unlikely that it will produce a valid delta. We no longer
589 # text's, it is very unlikely that it will produce a valid delta. We no longer
590 # consider these candidates.
590 # consider these candidates.
591 LIMIT_BASE2TEXT = 500
591 LIMIT_BASE2TEXT = 500
592
592
593 ### stage of the search, used for debug and to select and to adjust some logic.
593 ### stage of the search, used for debug and to select and to adjust some logic.
594 # initial stage, next step is unknown
594 # initial stage, next step is unknown
595 _STAGE_UNSPECIFIED = "unspecified"
595 _STAGE_UNSPECIFIED = "unspecified"
596 # trying the cached delta
596 # trying the cached delta
597 _STAGE_CACHED = "cached"
597 _STAGE_CACHED = "cached"
598 # trying delta based on parents
598 # trying delta based on parents
599 _STAGE_PARENTS = "parents"
599 _STAGE_PARENTS = "parents"
600 # trying to build a valid snapshot of any level
600 # trying to build a valid snapshot of any level
601 _STAGE_SNAPSHOT = "snapshot"
601 _STAGE_SNAPSHOT = "snapshot"
602 # trying to build a delta based of the previous revision
602 # trying to build a delta based of the previous revision
603 _STAGE_PREV = "prev"
603 _STAGE_PREV = "prev"
604 # trying to build a full snapshot
604 # trying to build a full snapshot
605 _STAGE_FULL = "full"
605 _STAGE_FULL = "full"
606
606
607
607
608 class _BaseDeltaSearch(abc.ABC):
608 class _BaseDeltaSearch(abc.ABC):
609 """perform the search of a good delta for a single revlog revision
609 """perform the search of a good delta for a single revlog revision
610
610
611 note: some of the deltacomputer.finddeltainfo logic should probably move
611 note: some of the deltacomputer.finddeltainfo logic should probably move
612 here.
612 here.
613 """
613 """
614
614
615 def __init__(
615 def __init__(
616 self,
616 self,
617 revlog,
617 revlog,
618 revinfo,
618 revinfo,
619 p1,
619 p1,
620 p2,
620 p2,
621 cachedelta,
621 cachedelta,
622 excluded_bases=None,
622 excluded_bases=None,
623 target_rev=None,
623 target_rev=None,
624 snapshot_cache=None,
624 snapshot_cache=None,
625 ):
625 ):
626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
627 # so we should never end up asking such question. Adding the assert as
627 # so we should never end up asking such question. Adding the assert as
628 # a safe-guard to detect anything that would be fishy in this regard.
628 # a safe-guard to detect anything that would be fishy in this regard.
629 assert (
629 assert (
630 cachedelta is None
630 cachedelta is None
631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
632 or not revlog.delta_config.general_delta
632 or not revlog.delta_config.general_delta
633 )
633 )
634 self.revlog = revlog
634 self.revlog = revlog
635 self.revinfo = revinfo
635 self.revinfo = revinfo
636 self.textlen = revinfo.textlen
636 self.textlen = revinfo.textlen
637 self.p1 = p1
637 self.p1 = p1
638 self.p2 = p2
638 self.p2 = p2
639 self.cachedelta = cachedelta
639 self.cachedelta = cachedelta
640 self.excluded_bases = excluded_bases
640 self.excluded_bases = excluded_bases
641 if target_rev is None:
641 if target_rev is None:
642 self.target_rev = len(self.revlog)
642 self.target_rev = len(self.revlog)
643 self.target_rev = target_rev
643 self.target_rev = target_rev
644 if snapshot_cache is None:
644 if snapshot_cache is None:
645 # map: base-rev: [snapshot-revs]
645 # map: base-rev: [snapshot-revs]
646 snapshot_cache = SnapshotCache()
646 snapshot_cache = SnapshotCache()
647 self.snapshot_cache = snapshot_cache
647 self.snapshot_cache = snapshot_cache
648
648
649 self.tested = {nullrev}
649 self.tested = {nullrev}
650
650
651 self.current_stage = _STAGE_UNSPECIFIED
651 self.current_stage = _STAGE_UNSPECIFIED
652 self.current_group = None
652 self.current_group = None
653 self._init_group()
653 self._init_group()
654
654
655 def is_good_delta_info(self, deltainfo):
655 def is_good_delta_info(self, deltainfo):
656 """Returns True if the given delta is good.
656 """Returns True if the given delta is good.
657
657
658 Good means that it is within the disk span, disk size, and chain length
658 Good means that it is within the disk span, disk size, and chain length
659 bounds that we know to be performant.
659 bounds that we know to be performant.
660 """
660 """
661 if not self._is_good_delta_info_universal(deltainfo):
661 if not self._is_good_delta_info_universal(deltainfo):
662 return False
662 return False
663 if not self._is_good_delta_info_chain_quality(deltainfo):
663 if not self._is_good_delta_info_chain_quality(deltainfo):
664 return False
664 return False
665 return True
665 return True
666
666
667 def _is_good_delta_info_universal(self, deltainfo):
667 def _is_good_delta_info_universal(self, deltainfo):
668 """Returns True if the given delta is good.
668 """Returns True if the given delta is good.
669
669
670 This performs generic checks needed by all format variants.
670 This performs generic checks needed by all format variants.
671
671
672 This is used by is_good_delta_info.
672 This is used by is_good_delta_info.
673 """
673 """
674
674
675 if deltainfo is None:
675 if deltainfo is None:
676 return False
676 return False
677
677
678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
679 # so we should never end up asking such question. Adding the assert as
679 # so we should never end up asking such question. Adding the assert as
680 # a safe-guard to detect anything that would be fishy in this regard.
680 # a safe-guard to detect anything that would be fishy in this regard.
681 assert (
681 assert (
682 self.revinfo.cachedelta is None
682 self.revinfo.cachedelta is None
683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
684 or not self.revlog.delta_config.general_delta
684 or not self.revlog.delta_config.general_delta
685 )
685 )
686
686
687 # Bad delta from new delta size:
687 # Bad delta from new delta size:
688 #
688 #
689 # If the delta size is larger than the target text, storing the delta
689 # If the delta size is larger than the target text, storing the delta
690 # will be inefficient.
690 # will be inefficient.
691 if self.revinfo.textlen < deltainfo.deltalen:
691 if self.revinfo.textlen < deltainfo.deltalen:
692 return False
692 return False
693
693
694 return True
694 return True
695
695
696 def _is_good_delta_info_chain_quality(self, deltainfo):
696 def _is_good_delta_info_chain_quality(self, deltainfo):
697 """Returns True if the chain associated with the delta is good.
697 """Returns True if the chain associated with the delta is good.
698
698
699 This performs checks for format that use delta chains.
699 This performs checks for format that use delta chains.
700
700
701 This is used by is_good_delta_info.
701 This is used by is_good_delta_info.
702 """
702 """
703 # - 'deltainfo.distance' is the distance from the base revision --
703 # - 'deltainfo.distance' is the distance from the base revision --
704 # bounding it limits the amount of I/O we need to do.
704 # bounding it limits the amount of I/O we need to do.
705
705
706 defaultmax = self.revinfo.textlen * 4
706 defaultmax = self.revinfo.textlen * 4
707 maxdist = self.revlog.delta_config.max_deltachain_span
707 maxdist = self.revlog.delta_config.max_deltachain_span
708 if not maxdist:
708 if not maxdist:
709 maxdist = deltainfo.distance # ensure the conditional pass
709 maxdist = deltainfo.distance # ensure the conditional pass
710 maxdist = max(maxdist, defaultmax)
710 maxdist = max(maxdist, defaultmax)
711
711
712 # Bad delta from read span:
712 # Bad delta from read span:
713 #
713 #
714 # If the span of data read is larger than the maximum allowed.
714 # If the span of data read is larger than the maximum allowed.
715 #
715 #
716 # In the sparse-revlog case, we rely on the associated "sparse
716 # In the sparse-revlog case, we rely on the associated "sparse
717 # reading" to avoid issue related to the span of data. In theory, it
717 # reading" to avoid issue related to the span of data. In theory, it
718 # would be possible to build pathological revlog where delta pattern
718 # would be possible to build pathological revlog where delta pattern
719 # would lead to too many reads. However, they do not happen in
719 # would lead to too many reads. However, they do not happen in
720 # practice at all. So we skip the span check entirely.
720 # practice at all. So we skip the span check entirely.
721 if (
721 if (
722 not self.revlog.delta_config.sparse_revlog
722 not self.revlog.delta_config.sparse_revlog
723 and maxdist < deltainfo.distance
723 and maxdist < deltainfo.distance
724 ):
724 ):
725 return False
725 return False
726
726
727 # Bad delta from cumulated payload size:
727 # Bad delta from cumulated payload size:
728 #
728 #
729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
730 # deltas we need to apply -- bounding it limits the amount of CPU
730 # deltas we need to apply -- bounding it limits the amount of CPU
731 # we consume.
731 # we consume.
732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
733 # If the sum of delta get larger than K * target text length.
733 # If the sum of delta get larger than K * target text length.
734 if max_chain_data < deltainfo.compresseddeltalen:
734 if max_chain_data < deltainfo.compresseddeltalen:
735 return False
735 return False
736
736
737 # Bad delta from chain length:
737 # Bad delta from chain length:
738 #
738 #
739 # If the number of delta in the chain gets too high.
739 # If the number of delta in the chain gets too high.
740 if (
740 if (
741 self.revlog.delta_config.max_chain_len
741 self.revlog.delta_config.max_chain_len
742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
743 ):
743 ):
744 return False
744 return False
745 return True
745 return True
746
746
747 @property
747 @property
748 def done(self):
748 def done(self):
749 """True when all possible candidate have been tested"""
749 """True when all possible candidate have been tested"""
750 return self.current_group is None
750 return self.current_group is None
751
751
752 @abc.abstractmethod
752 @abc.abstractmethod
753 def next_group(self, good_delta=None):
753 def next_group(self, good_delta=None):
754 """move to the next group to test
754 """move to the next group to test
755
755
756 The group of revision to test will be available in
756 The group of revision to test will be available in
757 `self.current_group`. If the previous group had any good delta, the
757 `self.current_group`. If the previous group had any good delta, the
758 best one can be passed as the `good_delta` parameter to help selecting
758 best one can be passed as the `good_delta` parameter to help selecting
759 the next group.
759 the next group.
760
760
761 If not revision remains to be, `self.done` will be True and
761 If not revision remains to be, `self.done` will be True and
762 `self.current_group` will be None.
762 `self.current_group` will be None.
763 """
763 """
764 pass
764 pass
765
765
766 @abc.abstractmethod
766 @abc.abstractmethod
767 def _init_group(self):
767 def _init_group(self):
768 pass
768 pass
769
769
770
770
771 class _NoDeltaSearch(_BaseDeltaSearch):
771 class _NoDeltaSearch(_BaseDeltaSearch):
772 """Search for no delta.
772 """Search for no delta.
773
773
774 This search variant is to be used in case where we should not store delta.
774 This search variant is to be used in case where we should not store delta.
775 """
775 """
776
776
777 def _init_group(self):
777 def _init_group(self):
778 self.current_stage = _STAGE_FULL
778 self.current_stage = _STAGE_FULL
779
779
780 def next_group(self, good_delta=None):
780 def next_group(self, good_delta=None):
781 pass
781 pass
782
782
783
783
784 class _PrevDeltaSearch(_BaseDeltaSearch):
784 class _PrevDeltaSearch(_BaseDeltaSearch):
785 """Search for delta against the previous revision only
785 """Search for delta against the previous revision only
786
786
787 This search variant is to be used when the format does not allow for delta
787 This search variant is to be used when the format does not allow for delta
788 against arbitrary bases.
788 against arbitrary bases.
789 """
789 """
790
790
791 def _init_group(self):
791 def _init_group(self):
792 self.current_stage = _STAGE_PREV
792 self.current_stage = _STAGE_PREV
793 self.current_group = [self.target_rev - 1]
793 self.current_group = [self.target_rev - 1]
794 self.tested.update(self.current_group)
794 self.tested.update(self.current_group)
795
795
796 def next_group(self, good_delta=None):
796 def next_group(self, good_delta=None):
797 self.current_stage = _STAGE_FULL
797 self.current_stage = _STAGE_FULL
798 self.current_group = None
798 self.current_group = None
799
799
800
800
801 class _GeneralDeltaSearch(_BaseDeltaSearch):
801 class _GeneralDeltaSearch(_BaseDeltaSearch):
802 """Delta search variant for general-delta repository"""
802 """Delta search variant for general-delta repository"""
803
803
804 def _init_group(self):
804 def _init_group(self):
805 # Why search for delta base if we cannot use a delta base ?
805 # Why search for delta base if we cannot use a delta base ?
806 # also see issue6056
806 # also see issue6056
807 assert self.revlog.delta_config.general_delta
807 assert self.revlog.delta_config.general_delta
808 self._candidates_iterator = self._iter_groups()
808 self._candidates_iterator = self._iter_groups()
809 self._last_good = None
809 self._last_good = None
810 if (
810 if (
811 self.cachedelta is not None
811 self.cachedelta is not None
812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
813 and self._pre_filter_rev(self.cachedelta[0])
813 and self._pre_filter_rev(self.cachedelta[0])
814 ):
814 ):
815 # First we try to reuse a the delta contained in the bundle. (or from
815 # First we try to reuse a the delta contained in the bundle. (or from
816 # the source revlog)
816 # the source revlog)
817 #
817 #
818 # This logic only applies to general delta repositories and can be
818 # This logic only applies to general delta repositories and can be
819 # disabled through configuration. Disabling reuse source delta is
819 # disabled through configuration. Disabling reuse source delta is
820 # useful when we want to make sure we recomputed "optimal" deltas.
820 # useful when we want to make sure we recomputed "optimal" deltas.
821 self.current_stage = _STAGE_CACHED
821 self.current_stage = _STAGE_CACHED
822 self._internal_group = (self.cachedelta[0],)
822 self._internal_group = (self.cachedelta[0],)
823 self._internal_idx = 0
823 self._internal_idx = 0
824 self.current_group = self._internal_group
824 self.current_group = self._internal_group
825 self.tested.update(self.current_group)
825 self.tested.update(self.current_group)
826 else:
826 else:
827 self._next_internal_group()
827 self._next_internal_group()
828
828
829 def _next_internal_group(self):
829 def _next_internal_group(self):
830 # self._internal_group can be larger than self.current_group
830 # self._internal_group can be larger than self.current_group
831 self._internal_idx = 0
831 self._internal_idx = 0
832 group = self._candidates_iterator.send(self._last_good)
832 group = self._candidates_iterator.send(self._last_good)
833 if group is not None:
833 if group is not None:
834 group = self._pre_filter_candidate_revs(group)
834 group = self._pre_filter_candidate_revs(group)
835 self._internal_group = group
835 self._internal_group = group
836 if self._internal_group is None:
836 if self._internal_group is None:
837 self.current_group = None
837 self.current_group = None
838 elif len(self._internal_group) == 0:
838 elif len(self._internal_group) == 0:
839 self.next_group()
839 self.next_group()
840 else:
840 else:
841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
842 if chunk_size > 0:
842 if chunk_size > 0:
843 self.current_group = self._internal_group[:chunk_size]
843 self.current_group = self._internal_group[:chunk_size]
844 self._internal_idx += chunk_size
844 self._internal_idx += chunk_size
845 else:
845 else:
846 self.current_group = self._internal_group
846 self.current_group = self._internal_group
847 self._internal_idx += len(self.current_group)
847 self._internal_idx += len(self.current_group)
848
848
849 self.tested.update(self.current_group)
849 self.tested.update(self.current_group)
850
850
851 def next_group(self, good_delta=None):
851 def next_group(self, good_delta=None):
852 old_good = self._last_good
852 old_good = self._last_good
853 if good_delta is not None:
853 if good_delta is not None:
854 self._last_good = good_delta.base
854 self._last_good = good_delta.base
855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
856 # the cache is good, let us use the cache as requested
856 # the cache is good, let us use the cache as requested
857 self._candidates_iterator = None
857 self._candidates_iterator = None
858 self._internal_group = None
858 self._internal_group = None
859 self._internal_idx = None
859 self._internal_idx = None
860 self.current_group = None
860 self.current_group = None
861 return
861 return
862
862
863 if (self._internal_idx < len(self._internal_group)) and (
863 if (self._internal_idx < len(self._internal_group)) and (
864 old_good != good_delta
864 old_good != good_delta
865 ):
865 ):
866 # When the size of the candidate group is big, it can result in
866 # When the size of the candidate group is big, it can result in
867 # a quite significant performance impact. To reduce this, we
867 # a quite significant performance impact. To reduce this, we
868 # can send them in smaller batches until the new batch does not
868 # can send them in smaller batches until the new batch does not
869 # provide any improvements.
869 # provide any improvements.
870 #
870 #
871 # This might reduce the overall efficiency of the compression
871 # This might reduce the overall efficiency of the compression
872 # in some corner cases, but that should also prevent very
872 # in some corner cases, but that should also prevent very
873 # pathological cases from being an issue. (eg. 20 000
873 # pathological cases from being an issue. (eg. 20 000
874 # candidates).
874 # candidates).
875 #
875 #
876 # XXX note that the ordering of the group becomes important as
876 # XXX note that the ordering of the group becomes important as
877 # it now impacts the final result. The current order is
877 # it now impacts the final result. The current order is
878 # unprocessed and can be improved.
878 # unprocessed and can be improved.
879 next_idx = self._internal_idx + self._group_chunk_size
879 next_idx = self._internal_idx + self._group_chunk_size
880 self.current_group = self._internal_group[
880 self.current_group = self._internal_group[
881 self._internal_idx : next_idx
881 self._internal_idx : next_idx
882 ]
882 ]
883 self.tested.update(self.current_group)
883 self.tested.update(self.current_group)
884 self._internal_idx = next_idx
884 self._internal_idx = next_idx
885 else:
885 else:
886 self._next_internal_group()
886 self._next_internal_group()
887
887
888 def _pre_filter_candidate_revs(self, temptative):
888 def _pre_filter_candidate_revs(self, temptative):
889 """filter possible candidate before computing a delta
889 """filter possible candidate before computing a delta
890
890
891 This function use various criteria to pre-filter candidate delta base
891 This function use various criteria to pre-filter candidate delta base
892 before we compute a delta and evaluate its quality.
892 before we compute a delta and evaluate its quality.
893
893
894 Such pre-filter limit the number of computed delta, an expensive operation.
894 Such pre-filter limit the number of computed delta, an expensive operation.
895
895
896 return the updated list of revision to test
896 return the updated list of revision to test
897 """
897 """
898 deltalength = self.revlog.length
898 deltalength = self.revlog.length
899 deltaparent = self.revlog.deltaparent
899 deltaparent = self.revlog.deltaparent
900
900
901 tested = self.tested
901 tested = self.tested
902 group = []
902 group = []
903 for rev in temptative:
903 for rev in temptative:
904 # skip over empty delta (no need to include them in a chain)
904 # skip over empty delta (no need to include them in a chain)
905 while not (rev == nullrev or rev in tested or deltalength(rev)):
905 while not (rev == nullrev or rev in tested or deltalength(rev)):
906 tested.add(rev)
906 tested.add(rev)
907 rev = deltaparent(rev)
907 rev = deltaparent(rev)
908 if self._pre_filter_rev(rev):
908 if self._pre_filter_rev(rev):
909 group.append(rev)
909 group.append(rev)
910 else:
910 else:
911 self.tested.add(rev)
911 self.tested.add(rev)
912 return group
912 return group
913
913
914 def _pre_filter_rev_universal(self, rev):
914 def _pre_filter_rev_universal(self, rev):
915 """pre filtering that is need in all cases.
915 """pre filtering that is need in all cases.
916
916
917 return True if it seems okay to test a rev, False otherwise.
917 return True if it seems okay to test a rev, False otherwise.
918
918
919 used by _pre_filter_rev.
919 used by _pre_filter_rev.
920 """
920 """
921 # no need to try a delta against nullrev, this will be done as
921 # no need to try a delta against nullrev, this will be done as
922 # a last resort.
922 # a last resort.
923 if rev == nullrev:
923 if rev == nullrev:
924 return False
924 return False
925 # filter out revision we tested already
925 # filter out revision we tested already
926 if rev in self.tested:
926 if rev in self.tested:
927 return False
927 return False
928
928
929 # an higher authority deamed the base unworthy (e.g. censored)
929 # an higher authority deamed the base unworthy (e.g. censored)
930 if self.excluded_bases is not None and rev in self.excluded_bases:
930 if self.excluded_bases is not None and rev in self.excluded_bases:
931 return False
931 return False
932 # We are in some recomputation cases and that rev is too high
932 # We are in some recomputation cases and that rev is too high
933 # in the revlog
933 # in the revlog
934 if self.target_rev is not None and rev >= self.target_rev:
934 if self.target_rev is not None and rev >= self.target_rev:
935 return False
935 return False
936 # no delta for rawtext-changing revs (see "candelta" for why)
936 # no delta for rawtext-changing revs (see "candelta" for why)
937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 return False
938 return False
939 return True
939 return True
940
940
941 def _pre_filter_rev_sparse(self, rev):
942 """pre filtering that is needed in sparse revlog cases
943
944 return True if it seems okay to test a rev, False otherwise.
945
946 used by _pre_filter_rev.
947 """
948 assert self.revlog.delta_config.sparse_revlog
949 # if the revision we test again is too small, the resulting delta
950 # will be large anyway as that amount of data to be added is big
951 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
952 return False
953
954 if self.revlog.delta_config.upper_bound_comp is not None:
955 maxcomp = self.revlog.delta_config.upper_bound_comp
956 basenotsnap = (self.p1, self.p2, nullrev)
957 if rev not in basenotsnap and self.revlog.issnapshot(rev):
958 snapshotdepth = self.revlog.snapshotdepth(rev)
959 # If text is significantly larger than the base, we can
960 # expect the resulting delta to be proportional to the size
961 # difference
962 revsize = self.revlog.rawsize(rev)
963 rawsizedistance = max(self.textlen - revsize, 0)
964 # use an estimate of the compression upper bound.
965 lowestrealisticdeltalen = rawsizedistance // maxcomp
966
967 # check the absolute constraint on the delta size
968 snapshotlimit = self.textlen >> snapshotdepth
969 if snapshotlimit < lowestrealisticdeltalen:
970 # delta lower bound is larger than accepted upper
971 # bound
972 return False
973
974 # check the relative constraint on the delta size
975 revlength = self.revlog.length(rev)
976 if revlength < lowestrealisticdeltalen:
977 # delta probable lower bound is larger than target
978 # base
979 return False
980 return True
981
982 def _pre_filter_rev_delta_chain(self, rev):
941 def _pre_filter_rev_delta_chain(self, rev):
983 """pre filtering that is needed in sparse revlog cases
942 """pre filtering that is needed in sparse revlog cases
984
943
985 return True if it seems okay to test a rev, False otherwise.
944 return True if it seems okay to test a rev, False otherwise.
986
945
987 used by _pre_filter_rev.
946 used by _pre_filter_rev.
988 """
947 """
989 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
948 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
990 # filter out delta base that will never produce good delta
949 # filter out delta base that will never produce good delta
991 #
950 #
992 # if the delta of that base is already bigger than the limit
951 # if the delta of that base is already bigger than the limit
993 # for the delta chain size, doing a delta is hopeless.
952 # for the delta chain size, doing a delta is hopeless.
994 if deltas_limit < self.revlog.length(rev):
953 if deltas_limit < self.revlog.length(rev):
995 return False
954 return False
996
955
997 # If we reach here, we are about to build and test a delta.
956 # If we reach here, we are about to build and test a delta.
998 # The delta building process will compute the chaininfo in all
957 # The delta building process will compute the chaininfo in all
999 # case, since that computation is cached, it is fine to access
958 # case, since that computation is cached, it is fine to access
1000 # it here too.
959 # it here too.
1001 chainlen, chainsize = self.revlog._chaininfo(rev)
960 chainlen, chainsize = self.revlog._chaininfo(rev)
1002 # if chain will be too long, skip base
961 # if chain will be too long, skip base
1003 if (
962 if (
1004 self.revlog.delta_config.max_chain_len
963 self.revlog.delta_config.max_chain_len
1005 and chainlen >= self.revlog.delta_config.max_chain_len
964 and chainlen >= self.revlog.delta_config.max_chain_len
1006 ):
965 ):
1007 return False
966 return False
1008 # if chain already have too much data, skip base
967 # if chain already have too much data, skip base
1009 if deltas_limit < chainsize:
968 if deltas_limit < chainsize:
1010 return False
969 return False
1011 return True
970 return True
1012
971
1013 def _pre_filter_rev(self, rev):
972 def _pre_filter_rev(self, rev):
1014 """return True if it seems okay to test a rev, False otherwise"""
973 """return True if it seems okay to test a rev, False otherwise"""
1015 if not self._pre_filter_rev_universal(rev):
974 if not self._pre_filter_rev_universal(rev):
1016 return False
975 return False
1017 if not self._pre_filter_rev_delta_chain(rev):
976 if not self._pre_filter_rev_delta_chain(rev):
1018 return False
977 return False
1019 if self.revlog.delta_config.sparse_revlog:
1020 if not self._pre_filter_rev_sparse(rev):
1021 return False
1022
1023 return True
978 return True
1024
979
1025 def _iter_parents(self):
980 def _iter_parents(self):
1026 # exclude already lazy tested base if any
981 # exclude already lazy tested base if any
1027 parents = [p for p in (self.p1, self.p2) if p != nullrev]
982 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1028
983
1029 self.current_stage = _STAGE_PARENTS
984 self.current_stage = _STAGE_PARENTS
1030 if (
985 if (
1031 not self.revlog.delta_config.delta_both_parents
986 not self.revlog.delta_config.delta_both_parents
1032 and len(parents) == 2
987 and len(parents) == 2
1033 ):
988 ):
1034 parents.sort()
989 parents.sort()
1035 # To minimize the chance of having to build a fulltext,
990 # To minimize the chance of having to build a fulltext,
1036 # pick first whichever parent is closest to us (max rev)
991 # pick first whichever parent is closest to us (max rev)
1037 yield (parents[1],)
992 yield (parents[1],)
1038 # then the other one (min rev) if the first did not fit
993 # then the other one (min rev) if the first did not fit
1039 yield (parents[0],)
994 yield (parents[0],)
1040 elif len(parents) > 0:
995 elif len(parents) > 0:
1041 # Test all parents (1 or 2), and keep the best candidate
996 # Test all parents (1 or 2), and keep the best candidate
1042 yield parents
997 yield parents
1043
998
1044 def _iter_prev(self):
999 def _iter_prev(self):
1045 # other approach failed try against prev to hopefully save us a
1000 # other approach failed try against prev to hopefully save us a
1046 # fulltext.
1001 # fulltext.
1047 self.current_stage = _STAGE_PREV
1002 self.current_stage = _STAGE_PREV
1048 yield (self.target_rev - 1,)
1003 yield (self.target_rev - 1,)
1049
1004
1050 def _iter_groups(self):
1005 def _iter_groups(self):
1051 good = None
1006 good = None
1052 for group in self._iter_parents():
1007 for group in self._iter_parents():
1053 good = yield group
1008 good = yield group
1054 if good is not None:
1009 if good is not None:
1055 break
1010 break
1056 else:
1011 else:
1057 assert good is None
1012 assert good is None
1058 yield from self._iter_prev()
1013 yield from self._iter_prev()
1059 yield None
1014 yield None
1060
1015
1061
1016
1062 class _SparseDeltaSearch(_GeneralDeltaSearch):
1017 class _SparseDeltaSearch(_GeneralDeltaSearch):
1063 """Delta search variants for sparse-revlog"""
1018 """Delta search variants for sparse-revlog"""
1064
1019
1065 def is_good_delta_info(self, deltainfo):
1020 def is_good_delta_info(self, deltainfo):
1066 """Returns True if the given delta is good.
1021 """Returns True if the given delta is good.
1067
1022
1068 Good means that it is within the disk span, disk size, and chain length
1023 Good means that it is within the disk span, disk size, and chain length
1069 bounds that we know to be performant.
1024 bounds that we know to be performant.
1070 """
1025 """
1071 if not self._is_good_delta_info_universal(deltainfo):
1026 if not self._is_good_delta_info_universal(deltainfo):
1072 return False
1027 return False
1073 if not self._is_good_delta_info_chain_quality(deltainfo):
1028 if not self._is_good_delta_info_chain_quality(deltainfo):
1074 return False
1029 return False
1075 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1030 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1076 return False
1031 return False
1077 return True
1032 return True
1078
1033
1079 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1034 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1080 """Returns True if the chain associated with snapshots
1035 """Returns True if the chain associated with snapshots
1081
1036
1082 This performs checks for format that use sparse-revlog and intermediate
1037 This performs checks for format that use sparse-revlog and intermediate
1083 snapshots.
1038 snapshots.
1084
1039
1085 This is used by is_good_delta_info.
1040 This is used by is_good_delta_info.
1086 """
1041 """
1087 # if not a snapshot, this method has no filtering to do
1042 # if not a snapshot, this method has no filtering to do
1088 if deltainfo.snapshotdepth is None:
1043 if deltainfo.snapshotdepth is None:
1089 return True
1044 return True
1090 # bad delta from intermediate snapshot size limit
1045 # bad delta from intermediate snapshot size limit
1091 #
1046 #
1092 # If an intermediate snapshot size is higher than the limit. The
1047 # If an intermediate snapshot size is higher than the limit. The
1093 # limit exist to prevent endless chain of intermediate delta to be
1048 # limit exist to prevent endless chain of intermediate delta to be
1094 # created.
1049 # created.
1095 if (
1050 if (
1096 self.revinfo.textlen >> deltainfo.snapshotdepth
1051 self.revinfo.textlen >> deltainfo.snapshotdepth
1097 ) < deltainfo.deltalen:
1052 ) < deltainfo.deltalen:
1098 return False
1053 return False
1099
1054
1100 # bad delta if new intermediate snapshot is larger than the previous
1055 # bad delta if new intermediate snapshot is larger than the previous
1101 # snapshot
1056 # snapshot
1102 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1057 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1103 return False
1058 return False
1104
1059
1105 return True
1060 return True
1106
1061
1062 def _pre_filter_rev(self, rev):
1063 """return True if it seems okay to test a rev, False otherwise"""
1064 if not self._pre_filter_rev_universal(rev):
1065 return False
1066 if not self._pre_filter_rev_delta_chain(rev):
1067 return False
1068 if not self._pre_filter_rev_sparse(rev):
1069 return False
1070 return True
1071
1072 def _pre_filter_rev_sparse(self, rev):
1073 """pre filtering that is needed in sparse revlog cases
1074
1075 return True if it seems okay to test a rev, False otherwise.
1076
1077 used by _pre_filter_rev.
1078 """
1079 assert self.revlog.delta_config.sparse_revlog
1080 # if the revision we test again is too small, the resulting delta
1081 # will be large anyway as that amount of data to be added is big
1082 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 return False
1084
1085 if self.revlog.delta_config.upper_bound_comp is not None:
1086 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 basenotsnap = (self.p1, self.p2, nullrev)
1088 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 # If text is significantly larger than the base, we can
1091 # expect the resulting delta to be proportional to the size
1092 # difference
1093 revsize = self.revlog.rawsize(rev)
1094 rawsizedistance = max(self.textlen - revsize, 0)
1095 # use an estimate of the compression upper bound.
1096 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097
1098 # check the absolute constraint on the delta size
1099 snapshotlimit = self.textlen >> snapshotdepth
1100 if snapshotlimit < lowestrealisticdeltalen:
1101 # delta lower bound is larger than accepted upper
1102 # bound
1103 return False
1104
1105 # check the relative constraint on the delta size
1106 revlength = self.revlog.length(rev)
1107 if revlength < lowestrealisticdeltalen:
1108 # delta probable lower bound is larger than target
1109 # base
1110 return False
1111 return True
1112
1107 def _iter_snapshots_base(self):
1113 def _iter_snapshots_base(self):
1108 assert self.revlog.delta_config.sparse_revlog
1114 assert self.revlog.delta_config.sparse_revlog
1109 assert self.current_stage == _STAGE_SNAPSHOT
1115 assert self.current_stage == _STAGE_SNAPSHOT
1110 prev = self.target_rev - 1
1116 prev = self.target_rev - 1
1111 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1117 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1112
1118
1113 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1119 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1114 if not parents:
1120 if not parents:
1115 return
1121 return
1116 # See if we can use an existing snapshot in the parent chains to
1122 # See if we can use an existing snapshot in the parent chains to
1117 # use as a base for a new intermediate-snapshot
1123 # use as a base for a new intermediate-snapshot
1118 #
1124 #
1119 # search for snapshot in parents delta chain map: snapshot-level:
1125 # search for snapshot in parents delta chain map: snapshot-level:
1120 # snapshot-rev
1126 # snapshot-rev
1121 parents_snaps = collections.defaultdict(set)
1127 parents_snaps = collections.defaultdict(set)
1122 candidate_chains = [deltachain(p) for p in parents]
1128 candidate_chains = [deltachain(p) for p in parents]
1123 for chain in candidate_chains:
1129 for chain in candidate_chains:
1124 for idx, s in enumerate(chain):
1130 for idx, s in enumerate(chain):
1125 if not self.revlog.issnapshot(s):
1131 if not self.revlog.issnapshot(s):
1126 break
1132 break
1127 parents_snaps[idx].add(s)
1133 parents_snaps[idx].add(s)
1128 snapfloor = min(parents_snaps[0]) + 1
1134 snapfloor = min(parents_snaps[0]) + 1
1129 self.snapshot_cache.update(self.revlog, snapfloor)
1135 self.snapshot_cache.update(self.revlog, snapfloor)
1130 # search for the highest "unrelated" revision
1136 # search for the highest "unrelated" revision
1131 #
1137 #
1132 # Adding snapshots used by "unrelated" revision increase the odd we
1138 # Adding snapshots used by "unrelated" revision increase the odd we
1133 # reuse an independant, yet better snapshot chain.
1139 # reuse an independant, yet better snapshot chain.
1134 #
1140 #
1135 # XXX instead of building a set of revisions, we could lazily
1141 # XXX instead of building a set of revisions, we could lazily
1136 # enumerate over the chains. That would be more efficient, however
1142 # enumerate over the chains. That would be more efficient, however
1137 # we stick to simple code for now.
1143 # we stick to simple code for now.
1138 all_revs = set()
1144 all_revs = set()
1139 for chain in candidate_chains:
1145 for chain in candidate_chains:
1140 all_revs.update(chain)
1146 all_revs.update(chain)
1141 other = None
1147 other = None
1142 for r in self.revlog.revs(prev, snapfloor):
1148 for r in self.revlog.revs(prev, snapfloor):
1143 if r not in all_revs:
1149 if r not in all_revs:
1144 other = r
1150 other = r
1145 break
1151 break
1146 if other is not None:
1152 if other is not None:
1147 # To avoid unfair competition, we won't use unrelated
1153 # To avoid unfair competition, we won't use unrelated
1148 # intermediate snapshot that are deeper than the ones from the
1154 # intermediate snapshot that are deeper than the ones from the
1149 # parent delta chain.
1155 # parent delta chain.
1150 max_depth = max(parents_snaps.keys())
1156 max_depth = max(parents_snaps.keys())
1151 chain = deltachain(other)
1157 chain = deltachain(other)
1152 for depth, s in enumerate(chain):
1158 for depth, s in enumerate(chain):
1153 if s < snapfloor:
1159 if s < snapfloor:
1154 continue
1160 continue
1155 if max_depth < depth:
1161 if max_depth < depth:
1156 break
1162 break
1157 if not self.revlog.issnapshot(s):
1163 if not self.revlog.issnapshot(s):
1158 break
1164 break
1159 parents_snaps[depth].add(s)
1165 parents_snaps[depth].add(s)
1160 # Test them as possible intermediate snapshot base We test them
1166 # Test them as possible intermediate snapshot base We test them
1161 # from highest to lowest level. High level one are more likely to
1167 # from highest to lowest level. High level one are more likely to
1162 # result in small delta
1168 # result in small delta
1163 floor = None
1169 floor = None
1164 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1170 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1165 siblings = set()
1171 siblings = set()
1166 for s in snaps:
1172 for s in snaps:
1167 siblings.update(self.snapshot_cache.snapshots[s])
1173 siblings.update(self.snapshot_cache.snapshots[s])
1168 # Before considering making a new intermediate snapshot, we
1174 # Before considering making a new intermediate snapshot, we
1169 # check if an existing snapshot, children of base we consider,
1175 # check if an existing snapshot, children of base we consider,
1170 # would be suitable.
1176 # would be suitable.
1171 #
1177 #
1172 # It give a change to reuse a delta chain "unrelated" to the
1178 # It give a change to reuse a delta chain "unrelated" to the
1173 # current revision instead of starting our own. Without such
1179 # current revision instead of starting our own. Without such
1174 # re-use, topological branches would keep reopening new chains.
1180 # re-use, topological branches would keep reopening new chains.
1175 # Creating more and more snapshot as the repository grow.
1181 # Creating more and more snapshot as the repository grow.
1176
1182
1177 if floor is not None:
1183 if floor is not None:
1178 # We only do this for siblings created after the one in our
1184 # We only do this for siblings created after the one in our
1179 # parent's delta chain. Those created before has less
1185 # parent's delta chain. Those created before has less
1180 # chances to be valid base since our ancestors had to
1186 # chances to be valid base since our ancestors had to
1181 # create a new snapshot.
1187 # create a new snapshot.
1182 siblings = [r for r in siblings if floor < r]
1188 siblings = [r for r in siblings if floor < r]
1183 yield tuple(sorted(siblings))
1189 yield tuple(sorted(siblings))
1184 # then test the base from our parent's delta chain.
1190 # then test the base from our parent's delta chain.
1185 yield tuple(sorted(snaps))
1191 yield tuple(sorted(snaps))
1186 floor = min(snaps)
1192 floor = min(snaps)
1187 # No suitable base found in the parent chain, search if any full
1193 # No suitable base found in the parent chain, search if any full
1188 # snapshots emitted since parent's base would be a suitable base
1194 # snapshots emitted since parent's base would be a suitable base
1189 # for an intermediate snapshot.
1195 # for an intermediate snapshot.
1190 #
1196 #
1191 # It give a chance to reuse a delta chain unrelated to the current
1197 # It give a chance to reuse a delta chain unrelated to the current
1192 # revisions instead of starting our own. Without such re-use,
1198 # revisions instead of starting our own. Without such re-use,
1193 # topological branches would keep reopening new full chains.
1199 # topological branches would keep reopening new full chains.
1194 # Creating more and more snapshot as the repository grow.
1200 # Creating more and more snapshot as the repository grow.
1195 full = [
1201 full = [
1196 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1202 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1197 ]
1203 ]
1198 yield tuple(sorted(full))
1204 yield tuple(sorted(full))
1199
1205
1200 def _iter_snapshots(self):
1206 def _iter_snapshots(self):
1201 assert self.revlog.delta_config.sparse_revlog
1207 assert self.revlog.delta_config.sparse_revlog
1202 self.current_stage = _STAGE_SNAPSHOT
1208 self.current_stage = _STAGE_SNAPSHOT
1203 good = None
1209 good = None
1204 groups = self._iter_snapshots_base()
1210 groups = self._iter_snapshots_base()
1205 for candidates in groups:
1211 for candidates in groups:
1206 good = yield candidates
1212 good = yield candidates
1207 if good is not None:
1213 if good is not None:
1208 break
1214 break
1209 # if we have a refinable value, try to refine it
1215 # if we have a refinable value, try to refine it
1210 if (
1216 if (
1211 good is not None
1217 good is not None
1212 and good not in (self.p1, self.p2)
1218 and good not in (self.p1, self.p2)
1213 and self.revlog.issnapshot(good)
1219 and self.revlog.issnapshot(good)
1214 ):
1220 ):
1215 assert self.current_stage == _STAGE_SNAPSHOT
1221 assert self.current_stage == _STAGE_SNAPSHOT
1216 # refine snapshot down
1222 # refine snapshot down
1217 previous = None
1223 previous = None
1218 while previous != good:
1224 while previous != good:
1219 previous = good
1225 previous = good
1220 base = self.revlog.deltaparent(good)
1226 base = self.revlog.deltaparent(good)
1221 if base == nullrev:
1227 if base == nullrev:
1222 break
1228 break
1223 good = yield (base,)
1229 good = yield (base,)
1224 # refine snapshot up
1230 # refine snapshot up
1225 if not self.snapshot_cache.snapshots:
1231 if not self.snapshot_cache.snapshots:
1226 self.snapshot_cache.update(self.revlog, good + 1)
1232 self.snapshot_cache.update(self.revlog, good + 1)
1227 previous = None
1233 previous = None
1228 while good != previous:
1234 while good != previous:
1229 previous = good
1235 previous = good
1230 children = tuple(
1236 children = tuple(
1231 sorted(c for c in self.snapshot_cache.snapshots[good])
1237 sorted(c for c in self.snapshot_cache.snapshots[good])
1232 )
1238 )
1233 good = yield children
1239 good = yield children
1234 yield None
1240 yield None
1235
1241
1236 def _iter_groups(self):
1242 def _iter_groups(self):
1237 good = None
1243 good = None
1238 for group in self._iter_parents():
1244 for group in self._iter_parents():
1239 good = yield group
1245 good = yield group
1240 if good is not None:
1246 if good is not None:
1241 break
1247 break
1242 else:
1248 else:
1243 assert good is None
1249 assert good is None
1244 assert self.revlog.delta_config.sparse_revlog
1250 assert self.revlog.delta_config.sparse_revlog
1245 # If sparse revlog is enabled, we can try to refine the
1251 # If sparse revlog is enabled, we can try to refine the
1246 # available deltas
1252 # available deltas
1247 iter_snap = self._iter_snapshots()
1253 iter_snap = self._iter_snapshots()
1248 group = iter_snap.send(None)
1254 group = iter_snap.send(None)
1249 while group is not None:
1255 while group is not None:
1250 good = yield group
1256 good = yield group
1251 group = iter_snap.send(good)
1257 group = iter_snap.send(good)
1252 yield None
1258 yield None
1253
1259
1254
1260
1255 class SnapshotCache:
1261 class SnapshotCache:
1256 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1262 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1257
1263
1258 def __init__(self):
1264 def __init__(self):
1259 self.snapshots = collections.defaultdict(set)
1265 self.snapshots = collections.defaultdict(set)
1260 self._start_rev = None
1266 self._start_rev = None
1261 self._end_rev = None
1267 self._end_rev = None
1262
1268
1263 def update(self, revlog, start_rev=0):
1269 def update(self, revlog, start_rev=0):
1264 """find snapshots from start_rev to tip"""
1270 """find snapshots from start_rev to tip"""
1265 nb_revs = len(revlog)
1271 nb_revs = len(revlog)
1266 end_rev = nb_revs - 1
1272 end_rev = nb_revs - 1
1267 if start_rev > end_rev:
1273 if start_rev > end_rev:
1268 return # range is empty
1274 return # range is empty
1269
1275
1270 if self._start_rev is None:
1276 if self._start_rev is None:
1271 assert self._end_rev is None
1277 assert self._end_rev is None
1272 self._update(revlog, start_rev, end_rev)
1278 self._update(revlog, start_rev, end_rev)
1273 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1279 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1274 if start_rev < self._start_rev:
1280 if start_rev < self._start_rev:
1275 self._update(revlog, start_rev, self._start_rev - 1)
1281 self._update(revlog, start_rev, self._start_rev - 1)
1276 if self._end_rev < end_rev:
1282 if self._end_rev < end_rev:
1277 self._update(revlog, self._end_rev + 1, end_rev)
1283 self._update(revlog, self._end_rev + 1, end_rev)
1278
1284
1279 if self._start_rev is None:
1285 if self._start_rev is None:
1280 assert self._end_rev is None
1286 assert self._end_rev is None
1281 self._end_rev = end_rev
1287 self._end_rev = end_rev
1282 self._start_rev = start_rev
1288 self._start_rev = start_rev
1283 else:
1289 else:
1284 self._start_rev = min(self._start_rev, start_rev)
1290 self._start_rev = min(self._start_rev, start_rev)
1285 self._end_rev = max(self._end_rev, end_rev)
1291 self._end_rev = max(self._end_rev, end_rev)
1286 assert self._start_rev <= self._end_rev, (
1292 assert self._start_rev <= self._end_rev, (
1287 self._start_rev,
1293 self._start_rev,
1288 self._end_rev,
1294 self._end_rev,
1289 )
1295 )
1290
1296
1291 def _update(self, revlog, start_rev, end_rev):
1297 def _update(self, revlog, start_rev, end_rev):
1292 """internal method that actually do update content"""
1298 """internal method that actually do update content"""
1293 assert self._start_rev is None or (
1299 assert self._start_rev is None or (
1294 start_rev < self._start_rev or start_rev > self._end_rev
1300 start_rev < self._start_rev or start_rev > self._end_rev
1295 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1301 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1296 assert self._start_rev is None or (
1302 assert self._start_rev is None or (
1297 end_rev < self._start_rev or end_rev > self._end_rev
1303 end_rev < self._start_rev or end_rev > self._end_rev
1298 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1304 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1299 cache = self.snapshots
1305 cache = self.snapshots
1300 if hasattr(revlog.index, 'findsnapshots'):
1306 if hasattr(revlog.index, 'findsnapshots'):
1301 revlog.index.findsnapshots(cache, start_rev, end_rev)
1307 revlog.index.findsnapshots(cache, start_rev, end_rev)
1302 else:
1308 else:
1303 deltaparent = revlog.deltaparent
1309 deltaparent = revlog.deltaparent
1304 issnapshot = revlog.issnapshot
1310 issnapshot = revlog.issnapshot
1305 for rev in revlog.revs(start_rev, end_rev):
1311 for rev in revlog.revs(start_rev, end_rev):
1306 if issnapshot(rev):
1312 if issnapshot(rev):
1307 cache[deltaparent(rev)].add(rev)
1313 cache[deltaparent(rev)].add(rev)
1308
1314
1309
1315
1310 class deltacomputer:
1316 class deltacomputer:
1311 """object capable of computing delta and finding delta for multiple revision
1317 """object capable of computing delta and finding delta for multiple revision
1312
1318
1313 This object is meant to compute and find multiple delta applied to the same
1319 This object is meant to compute and find multiple delta applied to the same
1314 revlog.
1320 revlog.
1315 """
1321 """
1316
1322
1317 def __init__(
1323 def __init__(
1318 self,
1324 self,
1319 revlog,
1325 revlog,
1320 write_debug=None,
1326 write_debug=None,
1321 debug_search=False,
1327 debug_search=False,
1322 debug_info=None,
1328 debug_info=None,
1323 ):
1329 ):
1324 self.revlog = revlog
1330 self.revlog = revlog
1325 self._write_debug = write_debug
1331 self._write_debug = write_debug
1326 if write_debug is None:
1332 if write_debug is None:
1327 self._debug_search = False
1333 self._debug_search = False
1328 else:
1334 else:
1329 self._debug_search = debug_search
1335 self._debug_search = debug_search
1330 self._debug_info = debug_info
1336 self._debug_info = debug_info
1331 self._snapshot_cache = SnapshotCache()
1337 self._snapshot_cache = SnapshotCache()
1332
1338
1333 @property
1339 @property
1334 def _gather_debug(self):
1340 def _gather_debug(self):
1335 return self._write_debug is not None or self._debug_info is not None
1341 return self._write_debug is not None or self._debug_info is not None
1336
1342
1337 def buildtext(self, revinfo):
1343 def buildtext(self, revinfo):
1338 """Builds a fulltext version of a revision
1344 """Builds a fulltext version of a revision
1339
1345
1340 revinfo: revisioninfo instance that contains all needed info
1346 revinfo: revisioninfo instance that contains all needed info
1341 """
1347 """
1342 btext = revinfo.btext
1348 btext = revinfo.btext
1343 if btext[0] is not None:
1349 if btext[0] is not None:
1344 return btext[0]
1350 return btext[0]
1345
1351
1346 revlog = self.revlog
1352 revlog = self.revlog
1347 cachedelta = revinfo.cachedelta
1353 cachedelta = revinfo.cachedelta
1348 baserev = cachedelta[0]
1354 baserev = cachedelta[0]
1349 delta = cachedelta[1]
1355 delta = cachedelta[1]
1350
1356
1351 fulltext = btext[0] = _textfromdelta(
1357 fulltext = btext[0] = _textfromdelta(
1352 revlog,
1358 revlog,
1353 baserev,
1359 baserev,
1354 delta,
1360 delta,
1355 revinfo.p1,
1361 revinfo.p1,
1356 revinfo.p2,
1362 revinfo.p2,
1357 revinfo.flags,
1363 revinfo.flags,
1358 revinfo.node,
1364 revinfo.node,
1359 )
1365 )
1360 return fulltext
1366 return fulltext
1361
1367
1362 def _builddeltadiff(self, base, revinfo):
1368 def _builddeltadiff(self, base, revinfo):
1363 revlog = self.revlog
1369 revlog = self.revlog
1364 t = self.buildtext(revinfo)
1370 t = self.buildtext(revinfo)
1365 if revlog.iscensored(base):
1371 if revlog.iscensored(base):
1366 # deltas based on a censored revision must replace the
1372 # deltas based on a censored revision must replace the
1367 # full content in one patch, so delta works everywhere
1373 # full content in one patch, so delta works everywhere
1368 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1374 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1369 delta = header + t
1375 delta = header + t
1370 else:
1376 else:
1371 ptext = revlog.rawdata(base)
1377 ptext = revlog.rawdata(base)
1372 delta = mdiff.textdiff(ptext, t)
1378 delta = mdiff.textdiff(ptext, t)
1373
1379
1374 return delta
1380 return delta
1375
1381
1376 def _builddeltainfo(
1382 def _builddeltainfo(
1377 self, revinfo, base, target_rev=None, as_snapshot=False
1383 self, revinfo, base, target_rev=None, as_snapshot=False
1378 ):
1384 ):
1379 # can we use the cached delta?
1385 # can we use the cached delta?
1380 revlog = self.revlog
1386 revlog = self.revlog
1381 chainbase = revlog.chainbase(base)
1387 chainbase = revlog.chainbase(base)
1382 if revlog.delta_config.general_delta:
1388 if revlog.delta_config.general_delta:
1383 deltabase = base
1389 deltabase = base
1384 else:
1390 else:
1385 if target_rev is not None and base != target_rev - 1:
1391 if target_rev is not None and base != target_rev - 1:
1386 msg = (
1392 msg = (
1387 b'general delta cannot use delta for something else '
1393 b'general delta cannot use delta for something else '
1388 b'than `prev`: %d<-%d'
1394 b'than `prev`: %d<-%d'
1389 )
1395 )
1390 msg %= (base, target_rev)
1396 msg %= (base, target_rev)
1391 raise error.ProgrammingError(msg)
1397 raise error.ProgrammingError(msg)
1392 deltabase = chainbase
1398 deltabase = chainbase
1393 snapshotdepth = None
1399 snapshotdepth = None
1394 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1400 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1395 snapshotdepth = 0
1401 snapshotdepth = 0
1396 elif revlog.delta_config.sparse_revlog and as_snapshot:
1402 elif revlog.delta_config.sparse_revlog and as_snapshot:
1397 assert revlog.issnapshot(deltabase)
1403 assert revlog.issnapshot(deltabase)
1398 # A delta chain should always be one full snapshot,
1404 # A delta chain should always be one full snapshot,
1399 # zero or more semi-snapshots, and zero or more deltas
1405 # zero or more semi-snapshots, and zero or more deltas
1400 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1406 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1401 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1407 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1402 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1408 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1403 delta = None
1409 delta = None
1404 if revinfo.cachedelta:
1410 if revinfo.cachedelta:
1405 cachebase = revinfo.cachedelta[0]
1411 cachebase = revinfo.cachedelta[0]
1406 # check if the diff still apply
1412 # check if the diff still apply
1407 currentbase = cachebase
1413 currentbase = cachebase
1408 while (
1414 while (
1409 currentbase != nullrev
1415 currentbase != nullrev
1410 and currentbase != base
1416 and currentbase != base
1411 and self.revlog.length(currentbase) == 0
1417 and self.revlog.length(currentbase) == 0
1412 ):
1418 ):
1413 currentbase = self.revlog.deltaparent(currentbase)
1419 currentbase = self.revlog.deltaparent(currentbase)
1414 if self.revlog.delta_config.lazy_delta and currentbase == base:
1420 if self.revlog.delta_config.lazy_delta and currentbase == base:
1415 delta = revinfo.cachedelta[1]
1421 delta = revinfo.cachedelta[1]
1416 if delta is None:
1422 if delta is None:
1417 delta = self._builddeltadiff(base, revinfo)
1423 delta = self._builddeltadiff(base, revinfo)
1418 if self._debug_search:
1424 if self._debug_search:
1419 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1425 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1420 msg %= len(delta)
1426 msg %= len(delta)
1421 self._write_debug(msg)
1427 self._write_debug(msg)
1422 # snapshotdept need to be neither None nor 0 level snapshot
1428 # snapshotdept need to be neither None nor 0 level snapshot
1423 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1429 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1424 lowestrealisticdeltalen = (
1430 lowestrealisticdeltalen = (
1425 len(delta) // revlog.delta_config.upper_bound_comp
1431 len(delta) // revlog.delta_config.upper_bound_comp
1426 )
1432 )
1427 snapshotlimit = revinfo.textlen >> snapshotdepth
1433 snapshotlimit = revinfo.textlen >> snapshotdepth
1428 if self._debug_search:
1434 if self._debug_search:
1429 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1435 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1430 msg %= lowestrealisticdeltalen
1436 msg %= lowestrealisticdeltalen
1431 self._write_debug(msg)
1437 self._write_debug(msg)
1432 if snapshotlimit < lowestrealisticdeltalen:
1438 if snapshotlimit < lowestrealisticdeltalen:
1433 if self._debug_search:
1439 if self._debug_search:
1434 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1440 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1435 self._write_debug(msg)
1441 self._write_debug(msg)
1436 return None
1442 return None
1437 if revlog.length(base) < lowestrealisticdeltalen:
1443 if revlog.length(base) < lowestrealisticdeltalen:
1438 if self._debug_search:
1444 if self._debug_search:
1439 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1445 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1440 self._write_debug(msg)
1446 self._write_debug(msg)
1441 return None
1447 return None
1442 header, data = revlog._inner.compress(delta)
1448 header, data = revlog._inner.compress(delta)
1443 deltalen = len(header) + len(data)
1449 deltalen = len(header) + len(data)
1444 offset = revlog.end(len(revlog) - 1)
1450 offset = revlog.end(len(revlog) - 1)
1445 dist = deltalen + offset - revlog.start(chainbase)
1451 dist = deltalen + offset - revlog.start(chainbase)
1446 chainlen, compresseddeltalen = revlog._chaininfo(base)
1452 chainlen, compresseddeltalen = revlog._chaininfo(base)
1447 chainlen += 1
1453 chainlen += 1
1448 compresseddeltalen += deltalen
1454 compresseddeltalen += deltalen
1449
1455
1450 return _deltainfo(
1456 return _deltainfo(
1451 dist,
1457 dist,
1452 deltalen,
1458 deltalen,
1453 (header, data),
1459 (header, data),
1454 deltabase,
1460 deltabase,
1455 chainbase,
1461 chainbase,
1456 chainlen,
1462 chainlen,
1457 compresseddeltalen,
1463 compresseddeltalen,
1458 snapshotdepth,
1464 snapshotdepth,
1459 )
1465 )
1460
1466
1461 def _fullsnapshotinfo(self, revinfo, curr):
1467 def _fullsnapshotinfo(self, revinfo, curr):
1462 rawtext = self.buildtext(revinfo)
1468 rawtext = self.buildtext(revinfo)
1463 data = self.revlog._inner.compress(rawtext)
1469 data = self.revlog._inner.compress(rawtext)
1464 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1470 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1465 deltabase = chainbase = curr
1471 deltabase = chainbase = curr
1466 snapshotdepth = 0
1472 snapshotdepth = 0
1467 chainlen = 1
1473 chainlen = 1
1468
1474
1469 return _deltainfo(
1475 return _deltainfo(
1470 dist,
1476 dist,
1471 deltalen,
1477 deltalen,
1472 data,
1478 data,
1473 deltabase,
1479 deltabase,
1474 chainbase,
1480 chainbase,
1475 chainlen,
1481 chainlen,
1476 compresseddeltalen,
1482 compresseddeltalen,
1477 snapshotdepth,
1483 snapshotdepth,
1478 )
1484 )
1479
1485
1480 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1486 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1481 """Find an acceptable delta against a candidate revision
1487 """Find an acceptable delta against a candidate revision
1482
1488
1483 revinfo: information about the revision (instance of _revisioninfo)
1489 revinfo: information about the revision (instance of _revisioninfo)
1484
1490
1485 Returns the first acceptable candidate revision, as ordered by
1491 Returns the first acceptable candidate revision, as ordered by
1486 _candidategroups
1492 _candidategroups
1487
1493
1488 If no suitable deltabase is found, we return delta info for a full
1494 If no suitable deltabase is found, we return delta info for a full
1489 snapshot.
1495 snapshot.
1490
1496
1491 `excluded_bases` is an optional set of revision that cannot be used as
1497 `excluded_bases` is an optional set of revision that cannot be used as
1492 a delta base. Use this to recompute delta suitable in censor or strip
1498 a delta base. Use this to recompute delta suitable in censor or strip
1493 context.
1499 context.
1494 """
1500 """
1495 if target_rev is None:
1501 if target_rev is None:
1496 target_rev = len(self.revlog)
1502 target_rev = len(self.revlog)
1497
1503
1498 gather_debug = self._gather_debug
1504 gather_debug = self._gather_debug
1499 cachedelta = revinfo.cachedelta
1505 cachedelta = revinfo.cachedelta
1500 revlog = self.revlog
1506 revlog = self.revlog
1501 p1r = p2r = None
1507 p1r = p2r = None
1502
1508
1503 if excluded_bases is None:
1509 if excluded_bases is None:
1504 excluded_bases = set()
1510 excluded_bases = set()
1505
1511
1506 if gather_debug:
1512 if gather_debug:
1507 start = util.timer()
1513 start = util.timer()
1508 dbg = self._one_dbg_data()
1514 dbg = self._one_dbg_data()
1509 dbg['revision'] = target_rev
1515 dbg['revision'] = target_rev
1510 p1r = revlog.rev(revinfo.p1)
1516 p1r = revlog.rev(revinfo.p1)
1511 p2r = revlog.rev(revinfo.p2)
1517 p2r = revlog.rev(revinfo.p2)
1512 if p1r != nullrev:
1518 if p1r != nullrev:
1513 p1_chain_len = revlog._chaininfo(p1r)[0]
1519 p1_chain_len = revlog._chaininfo(p1r)[0]
1514 else:
1520 else:
1515 p1_chain_len = -1
1521 p1_chain_len = -1
1516 if p2r != nullrev:
1522 if p2r != nullrev:
1517 p2_chain_len = revlog._chaininfo(p2r)[0]
1523 p2_chain_len = revlog._chaininfo(p2r)[0]
1518 else:
1524 else:
1519 p2_chain_len = -1
1525 p2_chain_len = -1
1520 dbg['p1-chain-len'] = p1_chain_len
1526 dbg['p1-chain-len'] = p1_chain_len
1521 dbg['p2-chain-len'] = p2_chain_len
1527 dbg['p2-chain-len'] = p2_chain_len
1522
1528
1523 # 1) if the revision is empty, no amount of delta can beat it
1529 # 1) if the revision is empty, no amount of delta can beat it
1524 #
1530 #
1525 # 2) no delta for flag processor revision (see "candelta" for why)
1531 # 2) no delta for flag processor revision (see "candelta" for why)
1526 # not calling candelta since only one revision needs test, also to
1532 # not calling candelta since only one revision needs test, also to
1527 # avoid overhead fetching flags again.
1533 # avoid overhead fetching flags again.
1528 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1534 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1529 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1535 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1530 if gather_debug:
1536 if gather_debug:
1531 end = util.timer()
1537 end = util.timer()
1532 dbg['duration'] = end - start
1538 dbg['duration'] = end - start
1533 dbg[
1539 dbg[
1534 'delta-base'
1540 'delta-base'
1535 ] = deltainfo.base # pytype: disable=attribute-error
1541 ] = deltainfo.base # pytype: disable=attribute-error
1536 dbg['search_round_count'] = 0
1542 dbg['search_round_count'] = 0
1537 dbg['using-cached-base'] = False
1543 dbg['using-cached-base'] = False
1538 dbg['delta_try_count'] = 0
1544 dbg['delta_try_count'] = 0
1539 dbg['type'] = b"full"
1545 dbg['type'] = b"full"
1540 dbg['snapshot-depth'] = 0
1546 dbg['snapshot-depth'] = 0
1541 self._dbg_process_data(dbg)
1547 self._dbg_process_data(dbg)
1542 return deltainfo
1548 return deltainfo
1543
1549
1544 deltainfo = None
1550 deltainfo = None
1545
1551
1546 # If this source delta are to be forcibly reuse, let us comply early.
1552 # If this source delta are to be forcibly reuse, let us comply early.
1547 if (
1553 if (
1548 revlog.delta_config.general_delta
1554 revlog.delta_config.general_delta
1549 and revinfo.cachedelta is not None
1555 and revinfo.cachedelta is not None
1550 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1556 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1551 ):
1557 ):
1552 base = revinfo.cachedelta[0]
1558 base = revinfo.cachedelta[0]
1553 if base == nullrev:
1559 if base == nullrev:
1554 dbg_type = b"full"
1560 dbg_type = b"full"
1555 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1561 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1556 if gather_debug:
1562 if gather_debug:
1557 snapshotdepth = 0
1563 snapshotdepth = 0
1558 elif base not in excluded_bases:
1564 elif base not in excluded_bases:
1559 delta = revinfo.cachedelta[1]
1565 delta = revinfo.cachedelta[1]
1560 header, data = revlog.compress(delta)
1566 header, data = revlog.compress(delta)
1561 deltalen = len(header) + len(data)
1567 deltalen = len(header) + len(data)
1562 if gather_debug:
1568 if gather_debug:
1563 offset = revlog.end(len(revlog) - 1)
1569 offset = revlog.end(len(revlog) - 1)
1564 chainbase = revlog.chainbase(base)
1570 chainbase = revlog.chainbase(base)
1565 distance = deltalen + offset - revlog.start(chainbase)
1571 distance = deltalen + offset - revlog.start(chainbase)
1566 chainlen, compresseddeltalen = revlog._chaininfo(base)
1572 chainlen, compresseddeltalen = revlog._chaininfo(base)
1567 chainlen += 1
1573 chainlen += 1
1568 compresseddeltalen += deltalen
1574 compresseddeltalen += deltalen
1569 if base == p1r or base == p2r:
1575 if base == p1r or base == p2r:
1570 dbg_type = b"delta"
1576 dbg_type = b"delta"
1571 snapshotdepth = None
1577 snapshotdepth = None
1572 elif not revlog.issnapshot(base):
1578 elif not revlog.issnapshot(base):
1573 snapshotdepth = None
1579 snapshotdepth = None
1574 else:
1580 else:
1575 dbg_type = b"snapshot"
1581 dbg_type = b"snapshot"
1576 snapshotdepth = revlog.snapshotdepth(base) + 1
1582 snapshotdepth = revlog.snapshotdepth(base) + 1
1577 else:
1583 else:
1578 distance = None
1584 distance = None
1579 chainbase = None
1585 chainbase = None
1580 chainlen = None
1586 chainlen = None
1581 compresseddeltalen = None
1587 compresseddeltalen = None
1582 snapshotdepth = None
1588 snapshotdepth = None
1583 deltainfo = _deltainfo(
1589 deltainfo = _deltainfo(
1584 distance=distance,
1590 distance=distance,
1585 deltalen=deltalen,
1591 deltalen=deltalen,
1586 data=(header, data),
1592 data=(header, data),
1587 base=base,
1593 base=base,
1588 chainbase=chainbase,
1594 chainbase=chainbase,
1589 chainlen=chainlen,
1595 chainlen=chainlen,
1590 compresseddeltalen=compresseddeltalen,
1596 compresseddeltalen=compresseddeltalen,
1591 snapshotdepth=snapshotdepth,
1597 snapshotdepth=snapshotdepth,
1592 )
1598 )
1593
1599
1594 if deltainfo is not None:
1600 if deltainfo is not None:
1595 if gather_debug:
1601 if gather_debug:
1596 end = util.timer()
1602 end = util.timer()
1597 dbg['duration'] = end - start
1603 dbg['duration'] = end - start
1598 dbg[
1604 dbg[
1599 'delta-base'
1605 'delta-base'
1600 ] = deltainfo.base # pytype: disable=attribute-error
1606 ] = deltainfo.base # pytype: disable=attribute-error
1601 dbg['search_round_count'] = 0
1607 dbg['search_round_count'] = 0
1602 dbg['using-cached-base'] = True
1608 dbg['using-cached-base'] = True
1603 dbg['delta_try_count'] = 0
1609 dbg['delta_try_count'] = 0
1604 dbg['type'] = b"full"
1610 dbg['type'] = b"full"
1605 if snapshotdepth is None:
1611 if snapshotdepth is None:
1606 dbg['snapshot-depth'] = -1
1612 dbg['snapshot-depth'] = -1
1607 else:
1613 else:
1608 dbg['snapshot-depth'] = snapshotdepth
1614 dbg['snapshot-depth'] = snapshotdepth
1609 self._dbg_process_data(dbg)
1615 self._dbg_process_data(dbg)
1610 return deltainfo
1616 return deltainfo
1611
1617
1612 # count the number of different delta we tried (for debug purpose)
1618 # count the number of different delta we tried (for debug purpose)
1613 dbg_try_count = 0
1619 dbg_try_count = 0
1614 # count the number of "search round" we did. (for debug purpose)
1620 # count the number of "search round" we did. (for debug purpose)
1615 dbg_try_rounds = 0
1621 dbg_try_rounds = 0
1616 dbg_type = b'unknown'
1622 dbg_type = b'unknown'
1617
1623
1618 if p1r is None:
1624 if p1r is None:
1619 p1r = revlog.rev(revinfo.p1)
1625 p1r = revlog.rev(revinfo.p1)
1620 p2r = revlog.rev(revinfo.p2)
1626 p2r = revlog.rev(revinfo.p2)
1621
1627
1622 if self._debug_search:
1628 if self._debug_search:
1623 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1629 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1624 msg %= target_rev
1630 msg %= target_rev
1625 self._write_debug(msg)
1631 self._write_debug(msg)
1626
1632
1627 # should we try to build a delta?
1633 # should we try to build a delta?
1628 if not (len(self.revlog) and self.revlog._storedeltachains):
1634 if not (len(self.revlog) and self.revlog._storedeltachains):
1629 search_cls = _NoDeltaSearch
1635 search_cls = _NoDeltaSearch
1630 elif self.revlog.delta_config.sparse_revlog:
1636 elif self.revlog.delta_config.sparse_revlog:
1631 search_cls = _SparseDeltaSearch
1637 search_cls = _SparseDeltaSearch
1632 elif self.revlog.delta_config.general_delta:
1638 elif self.revlog.delta_config.general_delta:
1633 search_cls = _GeneralDeltaSearch
1639 search_cls = _GeneralDeltaSearch
1634 else:
1640 else:
1635 # before general delta, there is only one possible delta base
1641 # before general delta, there is only one possible delta base
1636 search_cls = _PrevDeltaSearch
1642 search_cls = _PrevDeltaSearch
1637
1643
1638 search = search_cls(
1644 search = search_cls(
1639 self.revlog,
1645 self.revlog,
1640 revinfo,
1646 revinfo,
1641 p1r,
1647 p1r,
1642 p2r,
1648 p2r,
1643 cachedelta,
1649 cachedelta,
1644 excluded_bases,
1650 excluded_bases,
1645 target_rev,
1651 target_rev,
1646 snapshot_cache=self._snapshot_cache,
1652 snapshot_cache=self._snapshot_cache,
1647 )
1653 )
1648
1654
1649 while not search.done:
1655 while not search.done:
1650 current_group = search.current_group
1656 current_group = search.current_group
1651 # current_group can be `None`, but not is search.done is False
1657 # current_group can be `None`, but not is search.done is False
1652 # We add this assert to help pytype
1658 # We add this assert to help pytype
1653 assert current_group is not None
1659 assert current_group is not None
1654 candidaterevs = current_group
1660 candidaterevs = current_group
1655 dbg_try_rounds += 1
1661 dbg_try_rounds += 1
1656 if self._debug_search:
1662 if self._debug_search:
1657 prev = None
1663 prev = None
1658 if deltainfo is not None:
1664 if deltainfo is not None:
1659 prev = deltainfo.base
1665 prev = deltainfo.base
1660
1666
1661 if (
1667 if (
1662 cachedelta is not None
1668 cachedelta is not None
1663 and len(candidaterevs) == 1
1669 and len(candidaterevs) == 1
1664 and cachedelta[0] in candidaterevs
1670 and cachedelta[0] in candidaterevs
1665 ):
1671 ):
1666 round_type = b"cached-delta"
1672 round_type = b"cached-delta"
1667 elif p1r in candidaterevs or p2r in candidaterevs:
1673 elif p1r in candidaterevs or p2r in candidaterevs:
1668 round_type = b"parents"
1674 round_type = b"parents"
1669 elif prev is not None and all(c < prev for c in candidaterevs):
1675 elif prev is not None and all(c < prev for c in candidaterevs):
1670 round_type = b"refine-down"
1676 round_type = b"refine-down"
1671 elif prev is not None and all(c > prev for c in candidaterevs):
1677 elif prev is not None and all(c > prev for c in candidaterevs):
1672 round_type = b"refine-up"
1678 round_type = b"refine-up"
1673 else:
1679 else:
1674 round_type = b"search-down"
1680 round_type = b"search-down"
1675 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1681 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1676 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1682 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1677 self._write_debug(msg)
1683 self._write_debug(msg)
1678 nominateddeltas = []
1684 nominateddeltas = []
1679 if deltainfo is not None:
1685 if deltainfo is not None:
1680 if self._debug_search:
1686 if self._debug_search:
1681 msg = (
1687 msg = (
1682 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1688 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1683 )
1689 )
1684 msg %= (deltainfo.base, deltainfo.deltalen)
1690 msg %= (deltainfo.base, deltainfo.deltalen)
1685 self._write_debug(msg)
1691 self._write_debug(msg)
1686 # if we already found a good delta,
1692 # if we already found a good delta,
1687 # challenge it against refined candidates
1693 # challenge it against refined candidates
1688 nominateddeltas.append(deltainfo)
1694 nominateddeltas.append(deltainfo)
1689 for candidaterev in candidaterevs:
1695 for candidaterev in candidaterevs:
1690 if self._debug_search:
1696 if self._debug_search:
1691 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1697 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1692 msg %= candidaterev
1698 msg %= candidaterev
1693 self._write_debug(msg)
1699 self._write_debug(msg)
1694 candidate_type = None
1700 candidate_type = None
1695 if candidaterev == p1r:
1701 if candidaterev == p1r:
1696 candidate_type = b"p1"
1702 candidate_type = b"p1"
1697 elif candidaterev == p2r:
1703 elif candidaterev == p2r:
1698 candidate_type = b"p2"
1704 candidate_type = b"p2"
1699 elif self.revlog.issnapshot(candidaterev):
1705 elif self.revlog.issnapshot(candidaterev):
1700 candidate_type = b"snapshot-%d"
1706 candidate_type = b"snapshot-%d"
1701 candidate_type %= self.revlog.snapshotdepth(
1707 candidate_type %= self.revlog.snapshotdepth(
1702 candidaterev
1708 candidaterev
1703 )
1709 )
1704
1710
1705 if candidate_type is not None:
1711 if candidate_type is not None:
1706 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1712 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1707 msg %= candidate_type
1713 msg %= candidate_type
1708 self._write_debug(msg)
1714 self._write_debug(msg)
1709 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1715 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1710 msg %= self.revlog.length(candidaterev)
1716 msg %= self.revlog.length(candidaterev)
1711 self._write_debug(msg)
1717 self._write_debug(msg)
1712 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1718 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1713 msg %= self.revlog.deltaparent(candidaterev)
1719 msg %= self.revlog.deltaparent(candidaterev)
1714 self._write_debug(msg)
1720 self._write_debug(msg)
1715
1721
1716 dbg_try_count += 1
1722 dbg_try_count += 1
1717
1723
1718 if self._debug_search:
1724 if self._debug_search:
1719 delta_start = util.timer()
1725 delta_start = util.timer()
1720 candidatedelta = self._builddeltainfo(
1726 candidatedelta = self._builddeltainfo(
1721 revinfo,
1727 revinfo,
1722 candidaterev,
1728 candidaterev,
1723 target_rev=target_rev,
1729 target_rev=target_rev,
1724 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1730 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1725 )
1731 )
1726 if self._debug_search:
1732 if self._debug_search:
1727 delta_end = util.timer()
1733 delta_end = util.timer()
1728 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1734 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1729 msg %= delta_end - delta_start
1735 msg %= delta_end - delta_start
1730 self._write_debug(msg)
1736 self._write_debug(msg)
1731 if candidatedelta is not None:
1737 if candidatedelta is not None:
1732 if search.is_good_delta_info(candidatedelta):
1738 if search.is_good_delta_info(candidatedelta):
1733 if self._debug_search:
1739 if self._debug_search:
1734 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1740 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1735 msg %= candidatedelta.deltalen
1741 msg %= candidatedelta.deltalen
1736 self._write_debug(msg)
1742 self._write_debug(msg)
1737 nominateddeltas.append(candidatedelta)
1743 nominateddeltas.append(candidatedelta)
1738 elif self._debug_search:
1744 elif self._debug_search:
1739 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1745 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1740 msg %= candidatedelta.deltalen
1746 msg %= candidatedelta.deltalen
1741 self._write_debug(msg)
1747 self._write_debug(msg)
1742 elif self._debug_search:
1748 elif self._debug_search:
1743 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1749 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1744 self._write_debug(msg)
1750 self._write_debug(msg)
1745 if nominateddeltas:
1751 if nominateddeltas:
1746 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1752 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1747 search.next_group(deltainfo)
1753 search.next_group(deltainfo)
1748
1754
1749 if deltainfo is None:
1755 if deltainfo is None:
1750 dbg_type = b"full"
1756 dbg_type = b"full"
1751 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1757 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1752 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1758 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1753 dbg_type = b"snapshot"
1759 dbg_type = b"snapshot"
1754 else:
1760 else:
1755 dbg_type = b"delta"
1761 dbg_type = b"delta"
1756
1762
1757 if gather_debug:
1763 if gather_debug:
1758 end = util.timer()
1764 end = util.timer()
1759 if dbg_type == b'full':
1765 if dbg_type == b'full':
1760 used_cached = (
1766 used_cached = (
1761 cachedelta is not None
1767 cachedelta is not None
1762 and dbg_try_rounds == 0
1768 and dbg_try_rounds == 0
1763 and dbg_try_count == 0
1769 and dbg_try_count == 0
1764 and cachedelta[0] == nullrev
1770 and cachedelta[0] == nullrev
1765 )
1771 )
1766 else:
1772 else:
1767 used_cached = (
1773 used_cached = (
1768 cachedelta is not None
1774 cachedelta is not None
1769 and dbg_try_rounds == 1
1775 and dbg_try_rounds == 1
1770 and dbg_try_count == 1
1776 and dbg_try_count == 1
1771 and deltainfo.base == cachedelta[0]
1777 and deltainfo.base == cachedelta[0]
1772 )
1778 )
1773 dbg['duration'] = end - start
1779 dbg['duration'] = end - start
1774 dbg[
1780 dbg[
1775 'delta-base'
1781 'delta-base'
1776 ] = deltainfo.base # pytype: disable=attribute-error
1782 ] = deltainfo.base # pytype: disable=attribute-error
1777 dbg['search_round_count'] = dbg_try_rounds
1783 dbg['search_round_count'] = dbg_try_rounds
1778 dbg['using-cached-base'] = used_cached
1784 dbg['using-cached-base'] = used_cached
1779 dbg['delta_try_count'] = dbg_try_count
1785 dbg['delta_try_count'] = dbg_try_count
1780 dbg['type'] = dbg_type
1786 dbg['type'] = dbg_type
1781 if (
1787 if (
1782 deltainfo.snapshotdepth # pytype: disable=attribute-error
1788 deltainfo.snapshotdepth # pytype: disable=attribute-error
1783 is not None
1789 is not None
1784 ):
1790 ):
1785 dbg[
1791 dbg[
1786 'snapshot-depth'
1792 'snapshot-depth'
1787 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1793 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1788 else:
1794 else:
1789 dbg['snapshot-depth'] = -1
1795 dbg['snapshot-depth'] = -1
1790 self._dbg_process_data(dbg)
1796 self._dbg_process_data(dbg)
1791 return deltainfo
1797 return deltainfo
1792
1798
1793 def _one_dbg_data(self):
1799 def _one_dbg_data(self):
1794 dbg = {
1800 dbg = {
1795 'duration': None,
1801 'duration': None,
1796 'revision': None,
1802 'revision': None,
1797 'delta-base': None,
1803 'delta-base': None,
1798 'search_round_count': None,
1804 'search_round_count': None,
1799 'using-cached-base': None,
1805 'using-cached-base': None,
1800 'delta_try_count': None,
1806 'delta_try_count': None,
1801 'type': None,
1807 'type': None,
1802 'p1-chain-len': None,
1808 'p1-chain-len': None,
1803 'p2-chain-len': None,
1809 'p2-chain-len': None,
1804 'snapshot-depth': None,
1810 'snapshot-depth': None,
1805 'target-revlog': None,
1811 'target-revlog': None,
1806 }
1812 }
1807 target_revlog = b"UNKNOWN"
1813 target_revlog = b"UNKNOWN"
1808 target_type = self.revlog.target[0]
1814 target_type = self.revlog.target[0]
1809 target_key = self.revlog.target[1]
1815 target_key = self.revlog.target[1]
1810 if target_type == KIND_CHANGELOG:
1816 if target_type == KIND_CHANGELOG:
1811 target_revlog = b'CHANGELOG:'
1817 target_revlog = b'CHANGELOG:'
1812 elif target_type == KIND_MANIFESTLOG:
1818 elif target_type == KIND_MANIFESTLOG:
1813 target_revlog = b'MANIFESTLOG:'
1819 target_revlog = b'MANIFESTLOG:'
1814 if target_key:
1820 if target_key:
1815 target_revlog += b'%s:' % target_key
1821 target_revlog += b'%s:' % target_key
1816 elif target_type == KIND_FILELOG:
1822 elif target_type == KIND_FILELOG:
1817 target_revlog = b'FILELOG:'
1823 target_revlog = b'FILELOG:'
1818 if target_key:
1824 if target_key:
1819 target_revlog += b'%s:' % target_key
1825 target_revlog += b'%s:' % target_key
1820 dbg['target-revlog'] = target_revlog
1826 dbg['target-revlog'] = target_revlog
1821 return dbg
1827 return dbg
1822
1828
1823 def _dbg_process_data(self, dbg):
1829 def _dbg_process_data(self, dbg):
1824 if self._debug_info is not None:
1830 if self._debug_info is not None:
1825 self._debug_info.append(dbg)
1831 self._debug_info.append(dbg)
1826
1832
1827 if self._write_debug is not None:
1833 if self._write_debug is not None:
1828 msg = (
1834 msg = (
1829 b"DBG-DELTAS:"
1835 b"DBG-DELTAS:"
1830 b" %-12s"
1836 b" %-12s"
1831 b" rev=%d:"
1837 b" rev=%d:"
1832 b" delta-base=%d"
1838 b" delta-base=%d"
1833 b" is-cached=%d"
1839 b" is-cached=%d"
1834 b" - search-rounds=%d"
1840 b" - search-rounds=%d"
1835 b" try-count=%d"
1841 b" try-count=%d"
1836 b" - delta-type=%-6s"
1842 b" - delta-type=%-6s"
1837 b" snap-depth=%d"
1843 b" snap-depth=%d"
1838 b" - p1-chain-length=%d"
1844 b" - p1-chain-length=%d"
1839 b" p2-chain-length=%d"
1845 b" p2-chain-length=%d"
1840 b" - duration=%f"
1846 b" - duration=%f"
1841 b"\n"
1847 b"\n"
1842 )
1848 )
1843 msg %= (
1849 msg %= (
1844 dbg["target-revlog"],
1850 dbg["target-revlog"],
1845 dbg["revision"],
1851 dbg["revision"],
1846 dbg["delta-base"],
1852 dbg["delta-base"],
1847 dbg["using-cached-base"],
1853 dbg["using-cached-base"],
1848 dbg["search_round_count"],
1854 dbg["search_round_count"],
1849 dbg["delta_try_count"],
1855 dbg["delta_try_count"],
1850 dbg["type"],
1856 dbg["type"],
1851 dbg["snapshot-depth"],
1857 dbg["snapshot-depth"],
1852 dbg["p1-chain-len"],
1858 dbg["p1-chain-len"],
1853 dbg["p2-chain-len"],
1859 dbg["p2-chain-len"],
1854 dbg["duration"],
1860 dbg["duration"],
1855 )
1861 )
1856 self._write_debug(msg)
1862 self._write_debug(msg)
1857
1863
1858
1864
1859 def delta_compression(default_compression_header, deltainfo):
1865 def delta_compression(default_compression_header, deltainfo):
1860 """return (COMPRESSION_MODE, deltainfo)
1866 """return (COMPRESSION_MODE, deltainfo)
1861
1867
1862 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1868 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1863 compression.
1869 compression.
1864 """
1870 """
1865 h, d = deltainfo.data
1871 h, d = deltainfo.data
1866 compression_mode = COMP_MODE_INLINE
1872 compression_mode = COMP_MODE_INLINE
1867 if not h and not d:
1873 if not h and not d:
1868 # not data to store at all... declare them uncompressed
1874 # not data to store at all... declare them uncompressed
1869 compression_mode = COMP_MODE_PLAIN
1875 compression_mode = COMP_MODE_PLAIN
1870 elif not h:
1876 elif not h:
1871 t = d[0:1]
1877 t = d[0:1]
1872 if t == b'\0':
1878 if t == b'\0':
1873 compression_mode = COMP_MODE_PLAIN
1879 compression_mode = COMP_MODE_PLAIN
1874 elif t == default_compression_header:
1880 elif t == default_compression_header:
1875 compression_mode = COMP_MODE_DEFAULT
1881 compression_mode = COMP_MODE_DEFAULT
1876 elif h == b'u':
1882 elif h == b'u':
1877 # we have a more efficient way to declare uncompressed
1883 # we have a more efficient way to declare uncompressed
1878 h = b''
1884 h = b''
1879 compression_mode = COMP_MODE_PLAIN
1885 compression_mode = COMP_MODE_PLAIN
1880 deltainfo = drop_u_compression(deltainfo)
1886 deltainfo = drop_u_compression(deltainfo)
1881 return compression_mode, deltainfo
1887 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now