##// END OF EJS Templates
delta-find: move is_good_delta_info on the _DeltaSearch class...
marmoute -
r52224:7455cae6 default
parent child Browse files
Show More
@@ -1,1676 +1,1678 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
591 if deltainfo is None:
592 return False
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
601 )
602
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
608
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
615
616 # Bad delta from read span:
617 #
618 # If the span of data read is larger than the maximum allowed.
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
627
628 # Bad delta from new delta size:
629 #
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
633 return False
634
635 # Bad delta from cumulated payload size:
636 #
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
640
641 # Bad delta from chain length:
642 #
643 # If the number of delta in the chain gets too high.
644 if (
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
648 return False
649
650 # bad delta from intermediate snapshot size limit
651 #
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
655 if (
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
659 return False
660
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
663 if (
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
667 return False
668
669 return True
670
671
672 # If a revision's full text is that much bigger than a base candidate full
587 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
588 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
589 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
590 LIMIT_BASE2TEXT = 500
676
591
677
592
678 class _DeltaSearch:
593 class _DeltaSearch:
679 """perform the search of a good delta for a single revlog revision
594 """perform the search of a good delta for a single revlog revision
680
595
681 note: some of the deltacomputer.finddeltainfo logic should probably move
596 note: some of the deltacomputer.finddeltainfo logic should probably move
682 here.
597 here.
683 """
598 """
684
599
685 def __init__(
600 def __init__(
686 self,
601 self,
687 revlog,
602 revlog,
688 revinfo,
603 revinfo,
689 p1,
604 p1,
690 p2,
605 p2,
691 cachedelta,
606 cachedelta,
692 excluded_bases=None,
607 excluded_bases=None,
693 target_rev=None,
608 target_rev=None,
694 snapshot_cache=None,
609 snapshot_cache=None,
695 ):
610 ):
696 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
611 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
697 # so we should never end up asking such question. Adding the assert as
612 # so we should never end up asking such question. Adding the assert as
698 # a safe-guard to detect anything that would be fishy in this regard.
613 # a safe-guard to detect anything that would be fishy in this regard.
699 assert (
614 assert (
700 cachedelta is None
615 cachedelta is None
701 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
616 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
702 or not revlog.delta_config.general_delta
617 or not revlog.delta_config.general_delta
703 )
618 )
704 self.revlog = revlog
619 self.revlog = revlog
705 self.revinfo = revinfo
620 self.revinfo = revinfo
706 self.textlen = revinfo.textlen
621 self.textlen = revinfo.textlen
707 self.p1 = p1
622 self.p1 = p1
708 self.p2 = p2
623 self.p2 = p2
709 self.cachedelta = cachedelta
624 self.cachedelta = cachedelta
710 self.excluded_bases = excluded_bases
625 self.excluded_bases = excluded_bases
711 if target_rev is None:
626 if target_rev is None:
712 self.target_rev = len(self.revlog)
627 self.target_rev = len(self.revlog)
713 self.target_rev = target_rev
628 self.target_rev = target_rev
714 if snapshot_cache is None:
629 if snapshot_cache is None:
715 # map: base-rev: [snapshot-revs]
630 # map: base-rev: [snapshot-revs]
716 snapshot_cache = SnapshotCache()
631 snapshot_cache = SnapshotCache()
717 self.snapshot_cache = snapshot_cache
632 self.snapshot_cache = snapshot_cache
718
633
719 self.tested = {nullrev}
634 self.tested = {nullrev}
720
635
721 def candidate_groups(self):
636 def candidate_groups(self):
722 """Provides group of revision to be tested as delta base
637 """Provides group of revision to be tested as delta base
723
638
724 This top level function focus on emitting groups with unique and
639 This top level function focus on emitting groups with unique and
725 worthwhile content. See _raw_candidate_groups for details about the
640 worthwhile content. See _raw_candidate_groups for details about the
726 group order.
641 group order.
727 """
642 """
728 # should we try to build a delta?
643 # should we try to build a delta?
729 if not (len(self.revlog) and self.revlog._storedeltachains):
644 if not (len(self.revlog) and self.revlog._storedeltachains):
730 yield None
645 yield None
731 return
646 return
732
647
733 if not self.revlog.delta_config.general_delta:
648 if not self.revlog.delta_config.general_delta:
734 # before general delta, there is only one possible delta base
649 # before general delta, there is only one possible delta base
735 yield (self.target_rev - 1,)
650 yield (self.target_rev - 1,)
736 yield None
651 yield None
737 return
652 return
738
653
739 deltalength = self.revlog.length
654 deltalength = self.revlog.length
740 deltaparent = self.revlog.deltaparent
655 deltaparent = self.revlog.deltaparent
741 sparse = self.revlog.delta_config.sparse_revlog
656 sparse = self.revlog.delta_config.sparse_revlog
742 good = None
657 good = None
743
658
744 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
659 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
745 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
660 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
746
661
747 tested = self.tested # prefetch for speed and code compactness
662 tested = self.tested # prefetch for speed and code compactness
748 candidates = self._refined_groups()
663 candidates = self._refined_groups()
749 while True:
664 while True:
750 temptative = candidates.send(good)
665 temptative = candidates.send(good)
751 if temptative is None:
666 if temptative is None:
752 break
667 break
753 group = []
668 group = []
754 for rev in temptative:
669 for rev in temptative:
755 # skip over empty delta (no need to include them in a chain)
670 # skip over empty delta (no need to include them in a chain)
756 while not (rev == nullrev or rev in tested or deltalength(rev)):
671 while not (rev == nullrev or rev in tested or deltalength(rev)):
757 tested.add(rev)
672 tested.add(rev)
758 rev = deltaparent(rev)
673 rev = deltaparent(rev)
759 # no need to try a delta against nullrev, this will be done as
674 # no need to try a delta against nullrev, this will be done as
760 # a last resort.
675 # a last resort.
761 if rev == nullrev:
676 if rev == nullrev:
762 continue
677 continue
763 # filter out revision we tested already
678 # filter out revision we tested already
764 if rev in tested:
679 if rev in tested:
765 continue
680 continue
766
681
767 # an higher authority deamed the base unworthy (e.g. censored)
682 # an higher authority deamed the base unworthy (e.g. censored)
768 if (
683 if (
769 self.excluded_bases is not None
684 self.excluded_bases is not None
770 and rev in self.excluded_bases
685 and rev in self.excluded_bases
771 ):
686 ):
772 tested.add(rev)
687 tested.add(rev)
773 continue
688 continue
774 # We are in some recomputation cases and that rev is too high
689 # We are in some recomputation cases and that rev is too high
775 # in the revlog
690 # in the revlog
776 if self.target_rev is not None and rev >= self.target_rev:
691 if self.target_rev is not None and rev >= self.target_rev:
777 tested.add(rev)
692 tested.add(rev)
778 continue
693 continue
779 # filter out delta base that will never produce good delta
694 # filter out delta base that will never produce good delta
780 #
695 #
781 # if the delta of that base is already bigger than the limit
696 # if the delta of that base is already bigger than the limit
782 # for the delta chain size, doing a delta is hopeless.
697 # for the delta chain size, doing a delta is hopeless.
783 if deltas_limit < self.revlog.length(rev):
698 if deltas_limit < self.revlog.length(rev):
784 tested.add(rev)
699 tested.add(rev)
785 continue
700 continue
786 if sparse and self.revlog.rawsize(rev) < (
701 if sparse and self.revlog.rawsize(rev) < (
787 self.textlen // LIMIT_BASE2TEXT
702 self.textlen // LIMIT_BASE2TEXT
788 ):
703 ):
789 tested.add(rev)
704 tested.add(rev)
790 continue
705 continue
791 # no delta for rawtext-changing revs (see "candelta" for why)
706 # no delta for rawtext-changing revs (see "candelta" for why)
792 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
707 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
793 tested.add(rev)
708 tested.add(rev)
794 continue
709 continue
795
710
796 # If we reach here, we are about to build and test a delta.
711 # If we reach here, we are about to build and test a delta.
797 # The delta building process will compute the chaininfo in all
712 # The delta building process will compute the chaininfo in all
798 # case, since that computation is cached, it is fine to access
713 # case, since that computation is cached, it is fine to access
799 # it here too.
714 # it here too.
800 chainlen, chainsize = self.revlog._chaininfo(rev)
715 chainlen, chainsize = self.revlog._chaininfo(rev)
801 # if chain will be too long, skip base
716 # if chain will be too long, skip base
802 if (
717 if (
803 self.revlog.delta_config.max_chain_len
718 self.revlog.delta_config.max_chain_len
804 and chainlen >= self.revlog.delta_config.max_chain_len
719 and chainlen >= self.revlog.delta_config.max_chain_len
805 ):
720 ):
806 tested.add(rev)
721 tested.add(rev)
807 continue
722 continue
808 # if chain already have too much data, skip base
723 # if chain already have too much data, skip base
809 if deltas_limit < chainsize:
724 if deltas_limit < chainsize:
810 tested.add(rev)
725 tested.add(rev)
811 continue
726 continue
812 if (
727 if (
813 sparse
728 sparse
814 and self.revlog.delta_config.upper_bound_comp is not None
729 and self.revlog.delta_config.upper_bound_comp is not None
815 ):
730 ):
816 maxcomp = self.revlog.delta_config.upper_bound_comp
731 maxcomp = self.revlog.delta_config.upper_bound_comp
817 basenotsnap = (self.p1, self.p2, nullrev)
732 basenotsnap = (self.p1, self.p2, nullrev)
818 if rev not in basenotsnap and self.revlog.issnapshot(rev):
733 if rev not in basenotsnap and self.revlog.issnapshot(rev):
819 snapshotdepth = self.revlog.snapshotdepth(rev)
734 snapshotdepth = self.revlog.snapshotdepth(rev)
820 # If text is significantly larger than the base, we can
735 # If text is significantly larger than the base, we can
821 # expect the resulting delta to be proportional to the size
736 # expect the resulting delta to be proportional to the size
822 # difference
737 # difference
823 revsize = self.revlog.rawsize(rev)
738 revsize = self.revlog.rawsize(rev)
824 rawsizedistance = max(self.textlen - revsize, 0)
739 rawsizedistance = max(self.textlen - revsize, 0)
825 # use an estimate of the compression upper bound.
740 # use an estimate of the compression upper bound.
826 lowestrealisticdeltalen = rawsizedistance // maxcomp
741 lowestrealisticdeltalen = rawsizedistance // maxcomp
827
742
828 # check the absolute constraint on the delta size
743 # check the absolute constraint on the delta size
829 snapshotlimit = self.textlen >> snapshotdepth
744 snapshotlimit = self.textlen >> snapshotdepth
830 if snapshotlimit < lowestrealisticdeltalen:
745 if snapshotlimit < lowestrealisticdeltalen:
831 # delta lower bound is larger than accepted upper
746 # delta lower bound is larger than accepted upper
832 # bound
747 # bound
833 tested.add(rev)
748 tested.add(rev)
834 continue
749 continue
835
750
836 # check the relative constraint on the delta size
751 # check the relative constraint on the delta size
837 revlength = self.revlog.length(rev)
752 revlength = self.revlog.length(rev)
838 if revlength < lowestrealisticdeltalen:
753 if revlength < lowestrealisticdeltalen:
839 # delta probable lower bound is larger than target
754 # delta probable lower bound is larger than target
840 # base
755 # base
841 tested.add(rev)
756 tested.add(rev)
842 continue
757 continue
843
758
844 group.append(rev)
759 group.append(rev)
845 if group:
760 if group:
846 # When the size of the candidate group is big, it can result in
761 # When the size of the candidate group is big, it can result in
847 # a quite significant performance impact. To reduce this, we
762 # a quite significant performance impact. To reduce this, we
848 # can send them in smaller batches until the new batch does not
763 # can send them in smaller batches until the new batch does not
849 # provide any improvements.
764 # provide any improvements.
850 #
765 #
851 # This might reduce the overall efficiency of the compression
766 # This might reduce the overall efficiency of the compression
852 # in some corner cases, but that should also prevent very
767 # in some corner cases, but that should also prevent very
853 # pathological cases from being an issue. (eg. 20 000
768 # pathological cases from being an issue. (eg. 20 000
854 # candidates).
769 # candidates).
855 #
770 #
856 # XXX note that the ordering of the group becomes important as
771 # XXX note that the ordering of the group becomes important as
857 # it now impacts the final result. The current order is
772 # it now impacts the final result. The current order is
858 # unprocessed and can be improved.
773 # unprocessed and can be improved.
859 if group_chunk_size == 0:
774 if group_chunk_size == 0:
860 tested.update(group)
775 tested.update(group)
861 good = yield tuple(group)
776 good = yield tuple(group)
862 else:
777 else:
863 prev_good = good
778 prev_good = good
864 for start in range(0, len(group), group_chunk_size):
779 for start in range(0, len(group), group_chunk_size):
865 sub_group = group[start : start + group_chunk_size]
780 sub_group = group[start : start + group_chunk_size]
866 tested.update(sub_group)
781 tested.update(sub_group)
867 good = yield tuple(sub_group)
782 good = yield tuple(sub_group)
868 if prev_good == good:
783 if prev_good == good:
869 break
784 break
870
785
871 yield None
786 yield None
872
787
873 def _refined_groups(self):
788 def _refined_groups(self):
874 good = None
789 good = None
875 # First we try to reuse a the delta contained in the bundle. (or from
790 # First we try to reuse a the delta contained in the bundle. (or from
876 # the source revlog)
791 # the source revlog)
877 #
792 #
878 # This logic only applies to general delta repositories and can be
793 # This logic only applies to general delta repositories and can be
879 # disabled through configuration. Disabling reuse source delta is
794 # disabled through configuration. Disabling reuse source delta is
880 # useful when we want to make sure we recomputed "optimal" deltas.
795 # useful when we want to make sure we recomputed "optimal" deltas.
881 debug_info = None
796 debug_info = None
882 if (
797 if (
883 self.cachedelta is not None
798 self.cachedelta is not None
884 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
799 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
885 ):
800 ):
886 # Assume what we received from the server is a good choice
801 # Assume what we received from the server is a good choice
887 # build delta will reuse the cache
802 # build delta will reuse the cache
888 if debug_info is not None:
803 if debug_info is not None:
889 debug_info['cached-delta.tested'] += 1
804 debug_info['cached-delta.tested'] += 1
890 good = yield (self.cachedelta[0],)
805 good = yield (self.cachedelta[0],)
891 if good is not None:
806 if good is not None:
892 if debug_info is not None:
807 if debug_info is not None:
893 debug_info['cached-delta.accepted'] += 1
808 debug_info['cached-delta.accepted'] += 1
894 yield None
809 yield None
895 return
810 return
896 groups = self._raw_groups()
811 groups = self._raw_groups()
897 for candidates in groups:
812 for candidates in groups:
898 good = yield candidates
813 good = yield candidates
899 if good is not None:
814 if good is not None:
900 break
815 break
901
816
902 # If sparse revlog is enabled, we can try to refine the available
817 # If sparse revlog is enabled, we can try to refine the available
903 # deltas
818 # deltas
904 if not self.revlog.delta_config.sparse_revlog:
819 if not self.revlog.delta_config.sparse_revlog:
905 yield None
820 yield None
906 return
821 return
907
822
908 # if we have a refinable value, try to refine it
823 # if we have a refinable value, try to refine it
909 if (
824 if (
910 good is not None
825 good is not None
911 and good not in (self.p1, self.p2)
826 and good not in (self.p1, self.p2)
912 and self.revlog.issnapshot(good)
827 and self.revlog.issnapshot(good)
913 ):
828 ):
914 # refine snapshot down
829 # refine snapshot down
915 previous = None
830 previous = None
916 while previous != good:
831 while previous != good:
917 previous = good
832 previous = good
918 base = self.revlog.deltaparent(good)
833 base = self.revlog.deltaparent(good)
919 if base == nullrev:
834 if base == nullrev:
920 break
835 break
921 good = yield (base,)
836 good = yield (base,)
922 # refine snapshot up
837 # refine snapshot up
923 if not self.snapshot_cache.snapshots:
838 if not self.snapshot_cache.snapshots:
924 self.snapshot_cache.update(self.revlog, good + 1)
839 self.snapshot_cache.update(self.revlog, good + 1)
925 previous = None
840 previous = None
926 while good != previous:
841 while good != previous:
927 previous = good
842 previous = good
928 children = tuple(
843 children = tuple(
929 sorted(c for c in self.snapshot_cache.snapshots[good])
844 sorted(c for c in self.snapshot_cache.snapshots[good])
930 )
845 )
931 good = yield children
846 good = yield children
932
847
933 if debug_info is not None:
848 if debug_info is not None:
934 if good is None:
849 if good is None:
935 debug_info['no-solution'] += 1
850 debug_info['no-solution'] += 1
936
851
937 yield None
852 yield None
938
853
939 def _raw_groups(self):
854 def _raw_groups(self):
940 """Provides group of revision to be tested as delta base
855 """Provides group of revision to be tested as delta base
941
856
942 This lower level function focus on emitting delta theorically
857 This lower level function focus on emitting delta theorically
943 interresting without looking it any practical details.
858 interresting without looking it any practical details.
944
859
945 The group order aims at providing fast or small candidates first.
860 The group order aims at providing fast or small candidates first.
946 """
861 """
947 # Why search for delta base if we cannot use a delta base ?
862 # Why search for delta base if we cannot use a delta base ?
948 assert self.revlog.delta_config.general_delta
863 assert self.revlog.delta_config.general_delta
949 # also see issue6056
864 # also see issue6056
950 sparse = self.revlog.delta_config.sparse_revlog
865 sparse = self.revlog.delta_config.sparse_revlog
951 curr = len(self.revlog)
866 curr = len(self.revlog)
952 prev = curr - 1
867 prev = curr - 1
953 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
868 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
954
869
955 # exclude already lazy tested base if any
870 # exclude already lazy tested base if any
956 parents = [p for p in (self.p1, self.p2) if p != nullrev]
871 parents = [p for p in (self.p1, self.p2) if p != nullrev]
957
872
958 if (
873 if (
959 not self.revlog.delta_config.delta_both_parents
874 not self.revlog.delta_config.delta_both_parents
960 and len(parents) == 2
875 and len(parents) == 2
961 ):
876 ):
962 parents.sort()
877 parents.sort()
963 # To minimize the chance of having to build a fulltext,
878 # To minimize the chance of having to build a fulltext,
964 # pick first whichever parent is closest to us (max rev)
879 # pick first whichever parent is closest to us (max rev)
965 yield (parents[1],)
880 yield (parents[1],)
966 # then the other one (min rev) if the first did not fit
881 # then the other one (min rev) if the first did not fit
967 yield (parents[0],)
882 yield (parents[0],)
968 elif len(parents) > 0:
883 elif len(parents) > 0:
969 # Test all parents (1 or 2), and keep the best candidate
884 # Test all parents (1 or 2), and keep the best candidate
970 yield parents
885 yield parents
971
886
972 if sparse and parents:
887 if sparse and parents:
973 # See if we can use an existing snapshot in the parent chains to
888 # See if we can use an existing snapshot in the parent chains to
974 # use as a base for a new intermediate-snapshot
889 # use as a base for a new intermediate-snapshot
975 #
890 #
976 # search for snapshot in parents delta chain map: snapshot-level:
891 # search for snapshot in parents delta chain map: snapshot-level:
977 # snapshot-rev
892 # snapshot-rev
978 parents_snaps = collections.defaultdict(set)
893 parents_snaps = collections.defaultdict(set)
979 candidate_chains = [deltachain(p) for p in parents]
894 candidate_chains = [deltachain(p) for p in parents]
980 for chain in candidate_chains:
895 for chain in candidate_chains:
981 for idx, s in enumerate(chain):
896 for idx, s in enumerate(chain):
982 if not self.revlog.issnapshot(s):
897 if not self.revlog.issnapshot(s):
983 break
898 break
984 parents_snaps[idx].add(s)
899 parents_snaps[idx].add(s)
985 snapfloor = min(parents_snaps[0]) + 1
900 snapfloor = min(parents_snaps[0]) + 1
986 self.snapshot_cache.update(self.revlog, snapfloor)
901 self.snapshot_cache.update(self.revlog, snapfloor)
987 # search for the highest "unrelated" revision
902 # search for the highest "unrelated" revision
988 #
903 #
989 # Adding snapshots used by "unrelated" revision increase the odd we
904 # Adding snapshots used by "unrelated" revision increase the odd we
990 # reuse an independant, yet better snapshot chain.
905 # reuse an independant, yet better snapshot chain.
991 #
906 #
992 # XXX instead of building a set of revisions, we could lazily
907 # XXX instead of building a set of revisions, we could lazily
993 # enumerate over the chains. That would be more efficient, however
908 # enumerate over the chains. That would be more efficient, however
994 # we stick to simple code for now.
909 # we stick to simple code for now.
995 all_revs = set()
910 all_revs = set()
996 for chain in candidate_chains:
911 for chain in candidate_chains:
997 all_revs.update(chain)
912 all_revs.update(chain)
998 other = None
913 other = None
999 for r in self.revlog.revs(prev, snapfloor):
914 for r in self.revlog.revs(prev, snapfloor):
1000 if r not in all_revs:
915 if r not in all_revs:
1001 other = r
916 other = r
1002 break
917 break
1003 if other is not None:
918 if other is not None:
1004 # To avoid unfair competition, we won't use unrelated
919 # To avoid unfair competition, we won't use unrelated
1005 # intermediate snapshot that are deeper than the ones from the
920 # intermediate snapshot that are deeper than the ones from the
1006 # parent delta chain.
921 # parent delta chain.
1007 max_depth = max(parents_snaps.keys())
922 max_depth = max(parents_snaps.keys())
1008 chain = deltachain(other)
923 chain = deltachain(other)
1009 for depth, s in enumerate(chain):
924 for depth, s in enumerate(chain):
1010 if s < snapfloor:
925 if s < snapfloor:
1011 continue
926 continue
1012 if max_depth < depth:
927 if max_depth < depth:
1013 break
928 break
1014 if not self.revlog.issnapshot(s):
929 if not self.revlog.issnapshot(s):
1015 break
930 break
1016 parents_snaps[depth].add(s)
931 parents_snaps[depth].add(s)
1017 # Test them as possible intermediate snapshot base We test them
932 # Test them as possible intermediate snapshot base We test them
1018 # from highest to lowest level. High level one are more likely to
933 # from highest to lowest level. High level one are more likely to
1019 # result in small delta
934 # result in small delta
1020 floor = None
935 floor = None
1021 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
936 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1022 siblings = set()
937 siblings = set()
1023 for s in snaps:
938 for s in snaps:
1024 siblings.update(self.snapshot_cache.snapshots[s])
939 siblings.update(self.snapshot_cache.snapshots[s])
1025 # Before considering making a new intermediate snapshot, we
940 # Before considering making a new intermediate snapshot, we
1026 # check if an existing snapshot, children of base we consider,
941 # check if an existing snapshot, children of base we consider,
1027 # would be suitable.
942 # would be suitable.
1028 #
943 #
1029 # It give a change to reuse a delta chain "unrelated" to the
944 # It give a change to reuse a delta chain "unrelated" to the
1030 # current revision instead of starting our own. Without such
945 # current revision instead of starting our own. Without such
1031 # re-use, topological branches would keep reopening new chains.
946 # re-use, topological branches would keep reopening new chains.
1032 # Creating more and more snapshot as the repository grow.
947 # Creating more and more snapshot as the repository grow.
1033
948
1034 if floor is not None:
949 if floor is not None:
1035 # We only do this for siblings created after the one in our
950 # We only do this for siblings created after the one in our
1036 # parent's delta chain. Those created before has less
951 # parent's delta chain. Those created before has less
1037 # chances to be valid base since our ancestors had to
952 # chances to be valid base since our ancestors had to
1038 # create a new snapshot.
953 # create a new snapshot.
1039 siblings = [r for r in siblings if floor < r]
954 siblings = [r for r in siblings if floor < r]
1040 yield tuple(sorted(siblings))
955 yield tuple(sorted(siblings))
1041 # then test the base from our parent's delta chain.
956 # then test the base from our parent's delta chain.
1042 yield tuple(sorted(snaps))
957 yield tuple(sorted(snaps))
1043 floor = min(snaps)
958 floor = min(snaps)
1044 # No suitable base found in the parent chain, search if any full
959 # No suitable base found in the parent chain, search if any full
1045 # snapshots emitted since parent's base would be a suitable base
960 # snapshots emitted since parent's base would be a suitable base
1046 # for an intermediate snapshot.
961 # for an intermediate snapshot.
1047 #
962 #
1048 # It give a chance to reuse a delta chain unrelated to the current
963 # It give a chance to reuse a delta chain unrelated to the current
1049 # revisions instead of starting our own. Without such re-use,
964 # revisions instead of starting our own. Without such re-use,
1050 # topological branches would keep reopening new full chains.
965 # topological branches would keep reopening new full chains.
1051 # Creating more and more snapshot as the repository grow.
966 # Creating more and more snapshot as the repository grow.
1052 full = [
967 full = [
1053 r
968 r
1054 for r in self.snapshot_cache.snapshots[nullrev]
969 for r in self.snapshot_cache.snapshots[nullrev]
1055 if snapfloor <= r
970 if snapfloor <= r
1056 ]
971 ]
1057 yield tuple(sorted(full))
972 yield tuple(sorted(full))
1058
973
1059 if not sparse:
974 if not sparse:
1060 # other approach failed try against prev to hopefully save us a
975 # other approach failed try against prev to hopefully save us a
1061 # fulltext.
976 # fulltext.
1062 yield (prev,)
977 yield (prev,)
1063
978
979 def is_good_delta_info(self, deltainfo):
980 """Returns True if the given delta is good. Good means that it is
981 within the disk span, disk size, and chain length bounds that we know
982 to be performant."""
983 if deltainfo is None:
984 return False
985
986 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
987 # so we should never end up asking such question. Adding the assert as
988 # a safe-guard to detect anything that would be fishy in this regard.
989 assert (
990 self.revinfo.cachedelta is None
991 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
992 or not self.revlog.delta_config.general_delta
993 )
994
995 # - 'deltainfo.distance' is the distance from the base revision --
996 # bounding it limits the amount of I/O we need to do.
997 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
998 # deltas we need to apply -- bounding it limits the amount of CPU
999 # we consume.
1000
1001 textlen = self.revinfo.textlen
1002 defaultmax = textlen * 4
1003 maxdist = self.revlog.delta_config.max_deltachain_span
1004 if not maxdist:
1005 maxdist = deltainfo.distance # ensure the conditional pass
1006 maxdist = max(maxdist, defaultmax)
1007
1008 # Bad delta from read span:
1009 #
1010 # If the span of data read is larger than the maximum allowed.
1011 #
1012 # In the sparse-revlog case, we rely on the associated "sparse
1013 # reading" to avoid issue related to the span of data. In theory, it
1014 # would be possible to build pathological revlog where delta pattern
1015 # would lead to too many reads. However, they do not happen in
1016 # practice at all. So we skip the span check entirely.
1017 if (
1018 not self.revlog.delta_config.sparse_revlog
1019 and maxdist < deltainfo.distance
1020 ):
1021 return False
1022
1023 # Bad delta from new delta size:
1024 #
1025 # If the delta size is larger than the target text, storing the delta
1026 # will be inefficient.
1027 if textlen < deltainfo.deltalen:
1028 return False
1029
1030 # Bad delta from cumulated payload size:
1031 #
1032 # If the sum of delta get larger than K * target text length.
1033 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
1034 return False
1035
1036 # Bad delta from chain length:
1037 #
1038 # If the number of delta in the chain gets too high.
1039 if (
1040 self.revlog.delta_config.max_chain_len
1041 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1042 ):
1043 return False
1044
1045 # bad delta from intermediate snapshot size limit
1046 #
1047 # If an intermediate snapshot size is higher than the limit. The
1048 # limit exist to prevent endless chain of intermediate delta to be
1049 # created.
1050 if (
1051 deltainfo.snapshotdepth is not None
1052 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
1053 ):
1054 return False
1055
1056 # bad delta if new intermediate snapshot is larger than the previous
1057 # snapshot
1058 if (
1059 deltainfo.snapshotdepth
1060 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1061 ):
1062 return False
1063
1064 return True
1065
1064
1066
1065 class SnapshotCache:
1067 class SnapshotCache:
1066 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1068 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1067
1069
1068 def __init__(self):
1070 def __init__(self):
1069 self.snapshots = collections.defaultdict(set)
1071 self.snapshots = collections.defaultdict(set)
1070 self._start_rev = None
1072 self._start_rev = None
1071 self._end_rev = None
1073 self._end_rev = None
1072
1074
1073 def update(self, revlog, start_rev=0):
1075 def update(self, revlog, start_rev=0):
1074 """find snapshots from start_rev to tip"""
1076 """find snapshots from start_rev to tip"""
1075 nb_revs = len(revlog)
1077 nb_revs = len(revlog)
1076 end_rev = nb_revs - 1
1078 end_rev = nb_revs - 1
1077 if start_rev > end_rev:
1079 if start_rev > end_rev:
1078 return # range is empty
1080 return # range is empty
1079
1081
1080 if self._start_rev is None:
1082 if self._start_rev is None:
1081 assert self._end_rev is None
1083 assert self._end_rev is None
1082 self._update(revlog, start_rev, end_rev)
1084 self._update(revlog, start_rev, end_rev)
1083 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1085 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1084 if start_rev < self._start_rev:
1086 if start_rev < self._start_rev:
1085 self._update(revlog, start_rev, self._start_rev - 1)
1087 self._update(revlog, start_rev, self._start_rev - 1)
1086 if self._end_rev < end_rev:
1088 if self._end_rev < end_rev:
1087 self._update(revlog, self._end_rev + 1, end_rev)
1089 self._update(revlog, self._end_rev + 1, end_rev)
1088
1090
1089 if self._start_rev is None:
1091 if self._start_rev is None:
1090 assert self._end_rev is None
1092 assert self._end_rev is None
1091 self._end_rev = end_rev
1093 self._end_rev = end_rev
1092 self._start_rev = start_rev
1094 self._start_rev = start_rev
1093 else:
1095 else:
1094 self._start_rev = min(self._start_rev, start_rev)
1096 self._start_rev = min(self._start_rev, start_rev)
1095 self._end_rev = max(self._end_rev, end_rev)
1097 self._end_rev = max(self._end_rev, end_rev)
1096 assert self._start_rev <= self._end_rev, (
1098 assert self._start_rev <= self._end_rev, (
1097 self._start_rev,
1099 self._start_rev,
1098 self._end_rev,
1100 self._end_rev,
1099 )
1101 )
1100
1102
1101 def _update(self, revlog, start_rev, end_rev):
1103 def _update(self, revlog, start_rev, end_rev):
1102 """internal method that actually do update content"""
1104 """internal method that actually do update content"""
1103 assert self._start_rev is None or (
1105 assert self._start_rev is None or (
1104 start_rev < self._start_rev or start_rev > self._end_rev
1106 start_rev < self._start_rev or start_rev > self._end_rev
1105 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1107 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1106 assert self._start_rev is None or (
1108 assert self._start_rev is None or (
1107 end_rev < self._start_rev or end_rev > self._end_rev
1109 end_rev < self._start_rev or end_rev > self._end_rev
1108 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1110 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1109 cache = self.snapshots
1111 cache = self.snapshots
1110 if hasattr(revlog.index, 'findsnapshots'):
1112 if hasattr(revlog.index, 'findsnapshots'):
1111 revlog.index.findsnapshots(cache, start_rev, end_rev)
1113 revlog.index.findsnapshots(cache, start_rev, end_rev)
1112 else:
1114 else:
1113 deltaparent = revlog.deltaparent
1115 deltaparent = revlog.deltaparent
1114 issnapshot = revlog.issnapshot
1116 issnapshot = revlog.issnapshot
1115 for rev in revlog.revs(start_rev, end_rev):
1117 for rev in revlog.revs(start_rev, end_rev):
1116 if issnapshot(rev):
1118 if issnapshot(rev):
1117 cache[deltaparent(rev)].add(rev)
1119 cache[deltaparent(rev)].add(rev)
1118
1120
1119
1121
1120 class deltacomputer:
1122 class deltacomputer:
1121 """object capable of computing delta and finding delta for multiple revision
1123 """object capable of computing delta and finding delta for multiple revision
1122
1124
1123 This object is meant to compute and find multiple delta applied to the same
1125 This object is meant to compute and find multiple delta applied to the same
1124 revlog.
1126 revlog.
1125 """
1127 """
1126
1128
1127 def __init__(
1129 def __init__(
1128 self,
1130 self,
1129 revlog,
1131 revlog,
1130 write_debug=None,
1132 write_debug=None,
1131 debug_search=False,
1133 debug_search=False,
1132 debug_info=None,
1134 debug_info=None,
1133 ):
1135 ):
1134 self.revlog = revlog
1136 self.revlog = revlog
1135 self._write_debug = write_debug
1137 self._write_debug = write_debug
1136 if write_debug is None:
1138 if write_debug is None:
1137 self._debug_search = False
1139 self._debug_search = False
1138 else:
1140 else:
1139 self._debug_search = debug_search
1141 self._debug_search = debug_search
1140 self._debug_info = debug_info
1142 self._debug_info = debug_info
1141 self._snapshot_cache = SnapshotCache()
1143 self._snapshot_cache = SnapshotCache()
1142
1144
1143 @property
1145 @property
1144 def _gather_debug(self):
1146 def _gather_debug(self):
1145 return self._write_debug is not None or self._debug_info is not None
1147 return self._write_debug is not None or self._debug_info is not None
1146
1148
1147 def buildtext(self, revinfo):
1149 def buildtext(self, revinfo):
1148 """Builds a fulltext version of a revision
1150 """Builds a fulltext version of a revision
1149
1151
1150 revinfo: revisioninfo instance that contains all needed info
1152 revinfo: revisioninfo instance that contains all needed info
1151 """
1153 """
1152 btext = revinfo.btext
1154 btext = revinfo.btext
1153 if btext[0] is not None:
1155 if btext[0] is not None:
1154 return btext[0]
1156 return btext[0]
1155
1157
1156 revlog = self.revlog
1158 revlog = self.revlog
1157 cachedelta = revinfo.cachedelta
1159 cachedelta = revinfo.cachedelta
1158 baserev = cachedelta[0]
1160 baserev = cachedelta[0]
1159 delta = cachedelta[1]
1161 delta = cachedelta[1]
1160
1162
1161 fulltext = btext[0] = _textfromdelta(
1163 fulltext = btext[0] = _textfromdelta(
1162 revlog,
1164 revlog,
1163 baserev,
1165 baserev,
1164 delta,
1166 delta,
1165 revinfo.p1,
1167 revinfo.p1,
1166 revinfo.p2,
1168 revinfo.p2,
1167 revinfo.flags,
1169 revinfo.flags,
1168 revinfo.node,
1170 revinfo.node,
1169 )
1171 )
1170 return fulltext
1172 return fulltext
1171
1173
1172 def _builddeltadiff(self, base, revinfo):
1174 def _builddeltadiff(self, base, revinfo):
1173 revlog = self.revlog
1175 revlog = self.revlog
1174 t = self.buildtext(revinfo)
1176 t = self.buildtext(revinfo)
1175 if revlog.iscensored(base):
1177 if revlog.iscensored(base):
1176 # deltas based on a censored revision must replace the
1178 # deltas based on a censored revision must replace the
1177 # full content in one patch, so delta works everywhere
1179 # full content in one patch, so delta works everywhere
1178 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1180 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1179 delta = header + t
1181 delta = header + t
1180 else:
1182 else:
1181 ptext = revlog.rawdata(base)
1183 ptext = revlog.rawdata(base)
1182 delta = mdiff.textdiff(ptext, t)
1184 delta = mdiff.textdiff(ptext, t)
1183
1185
1184 return delta
1186 return delta
1185
1187
1186 def _builddeltainfo(self, revinfo, base, target_rev=None):
1188 def _builddeltainfo(self, revinfo, base, target_rev=None):
1187 # can we use the cached delta?
1189 # can we use the cached delta?
1188 revlog = self.revlog
1190 revlog = self.revlog
1189 chainbase = revlog.chainbase(base)
1191 chainbase = revlog.chainbase(base)
1190 if revlog.delta_config.general_delta:
1192 if revlog.delta_config.general_delta:
1191 deltabase = base
1193 deltabase = base
1192 else:
1194 else:
1193 if target_rev is not None and base != target_rev - 1:
1195 if target_rev is not None and base != target_rev - 1:
1194 msg = (
1196 msg = (
1195 b'general delta cannot use delta for something else '
1197 b'general delta cannot use delta for something else '
1196 b'than `prev`: %d<-%d'
1198 b'than `prev`: %d<-%d'
1197 )
1199 )
1198 msg %= (base, target_rev)
1200 msg %= (base, target_rev)
1199 raise error.ProgrammingError(msg)
1201 raise error.ProgrammingError(msg)
1200 deltabase = chainbase
1202 deltabase = chainbase
1201 snapshotdepth = None
1203 snapshotdepth = None
1202 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1204 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1203 snapshotdepth = 0
1205 snapshotdepth = 0
1204 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1206 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1205 # A delta chain should always be one full snapshot,
1207 # A delta chain should always be one full snapshot,
1206 # zero or more semi-snapshots, and zero or more deltas
1208 # zero or more semi-snapshots, and zero or more deltas
1207 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1209 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1208 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1210 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1209 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1211 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1210 delta = None
1212 delta = None
1211 if revinfo.cachedelta:
1213 if revinfo.cachedelta:
1212 cachebase = revinfo.cachedelta[0]
1214 cachebase = revinfo.cachedelta[0]
1213 # check if the diff still apply
1215 # check if the diff still apply
1214 currentbase = cachebase
1216 currentbase = cachebase
1215 while (
1217 while (
1216 currentbase != nullrev
1218 currentbase != nullrev
1217 and currentbase != base
1219 and currentbase != base
1218 and self.revlog.length(currentbase) == 0
1220 and self.revlog.length(currentbase) == 0
1219 ):
1221 ):
1220 currentbase = self.revlog.deltaparent(currentbase)
1222 currentbase = self.revlog.deltaparent(currentbase)
1221 if self.revlog.delta_config.lazy_delta and currentbase == base:
1223 if self.revlog.delta_config.lazy_delta and currentbase == base:
1222 delta = revinfo.cachedelta[1]
1224 delta = revinfo.cachedelta[1]
1223 if delta is None:
1225 if delta is None:
1224 delta = self._builddeltadiff(base, revinfo)
1226 delta = self._builddeltadiff(base, revinfo)
1225 if self._debug_search:
1227 if self._debug_search:
1226 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1228 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1227 msg %= len(delta)
1229 msg %= len(delta)
1228 self._write_debug(msg)
1230 self._write_debug(msg)
1229 # snapshotdept need to be neither None nor 0 level snapshot
1231 # snapshotdept need to be neither None nor 0 level snapshot
1230 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1232 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1231 lowestrealisticdeltalen = (
1233 lowestrealisticdeltalen = (
1232 len(delta) // revlog.delta_config.upper_bound_comp
1234 len(delta) // revlog.delta_config.upper_bound_comp
1233 )
1235 )
1234 snapshotlimit = revinfo.textlen >> snapshotdepth
1236 snapshotlimit = revinfo.textlen >> snapshotdepth
1235 if self._debug_search:
1237 if self._debug_search:
1236 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1238 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1237 msg %= lowestrealisticdeltalen
1239 msg %= lowestrealisticdeltalen
1238 self._write_debug(msg)
1240 self._write_debug(msg)
1239 if snapshotlimit < lowestrealisticdeltalen:
1241 if snapshotlimit < lowestrealisticdeltalen:
1240 if self._debug_search:
1242 if self._debug_search:
1241 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1243 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1242 self._write_debug(msg)
1244 self._write_debug(msg)
1243 return None
1245 return None
1244 if revlog.length(base) < lowestrealisticdeltalen:
1246 if revlog.length(base) < lowestrealisticdeltalen:
1245 if self._debug_search:
1247 if self._debug_search:
1246 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1248 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1247 self._write_debug(msg)
1249 self._write_debug(msg)
1248 return None
1250 return None
1249 header, data = revlog._inner.compress(delta)
1251 header, data = revlog._inner.compress(delta)
1250 deltalen = len(header) + len(data)
1252 deltalen = len(header) + len(data)
1251 offset = revlog.end(len(revlog) - 1)
1253 offset = revlog.end(len(revlog) - 1)
1252 dist = deltalen + offset - revlog.start(chainbase)
1254 dist = deltalen + offset - revlog.start(chainbase)
1253 chainlen, compresseddeltalen = revlog._chaininfo(base)
1255 chainlen, compresseddeltalen = revlog._chaininfo(base)
1254 chainlen += 1
1256 chainlen += 1
1255 compresseddeltalen += deltalen
1257 compresseddeltalen += deltalen
1256
1258
1257 return _deltainfo(
1259 return _deltainfo(
1258 dist,
1260 dist,
1259 deltalen,
1261 deltalen,
1260 (header, data),
1262 (header, data),
1261 deltabase,
1263 deltabase,
1262 chainbase,
1264 chainbase,
1263 chainlen,
1265 chainlen,
1264 compresseddeltalen,
1266 compresseddeltalen,
1265 snapshotdepth,
1267 snapshotdepth,
1266 )
1268 )
1267
1269
1268 def _fullsnapshotinfo(self, revinfo, curr):
1270 def _fullsnapshotinfo(self, revinfo, curr):
1269 rawtext = self.buildtext(revinfo)
1271 rawtext = self.buildtext(revinfo)
1270 data = self.revlog._inner.compress(rawtext)
1272 data = self.revlog._inner.compress(rawtext)
1271 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1273 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1272 deltabase = chainbase = curr
1274 deltabase = chainbase = curr
1273 snapshotdepth = 0
1275 snapshotdepth = 0
1274 chainlen = 1
1276 chainlen = 1
1275
1277
1276 return _deltainfo(
1278 return _deltainfo(
1277 dist,
1279 dist,
1278 deltalen,
1280 deltalen,
1279 data,
1281 data,
1280 deltabase,
1282 deltabase,
1281 chainbase,
1283 chainbase,
1282 chainlen,
1284 chainlen,
1283 compresseddeltalen,
1285 compresseddeltalen,
1284 snapshotdepth,
1286 snapshotdepth,
1285 )
1287 )
1286
1288
1287 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1289 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1288 """Find an acceptable delta against a candidate revision
1290 """Find an acceptable delta against a candidate revision
1289
1291
1290 revinfo: information about the revision (instance of _revisioninfo)
1292 revinfo: information about the revision (instance of _revisioninfo)
1291
1293
1292 Returns the first acceptable candidate revision, as ordered by
1294 Returns the first acceptable candidate revision, as ordered by
1293 _candidategroups
1295 _candidategroups
1294
1296
1295 If no suitable deltabase is found, we return delta info for a full
1297 If no suitable deltabase is found, we return delta info for a full
1296 snapshot.
1298 snapshot.
1297
1299
1298 `excluded_bases` is an optional set of revision that cannot be used as
1300 `excluded_bases` is an optional set of revision that cannot be used as
1299 a delta base. Use this to recompute delta suitable in censor or strip
1301 a delta base. Use this to recompute delta suitable in censor or strip
1300 context.
1302 context.
1301 """
1303 """
1302 if target_rev is None:
1304 if target_rev is None:
1303 target_rev = len(self.revlog)
1305 target_rev = len(self.revlog)
1304
1306
1305 gather_debug = self._gather_debug
1307 gather_debug = self._gather_debug
1306 cachedelta = revinfo.cachedelta
1308 cachedelta = revinfo.cachedelta
1307 revlog = self.revlog
1309 revlog = self.revlog
1308 p1r = p2r = None
1310 p1r = p2r = None
1309
1311
1310 if excluded_bases is None:
1312 if excluded_bases is None:
1311 excluded_bases = set()
1313 excluded_bases = set()
1312
1314
1313 if gather_debug:
1315 if gather_debug:
1314 start = util.timer()
1316 start = util.timer()
1315 dbg = self._one_dbg_data()
1317 dbg = self._one_dbg_data()
1316 dbg['revision'] = target_rev
1318 dbg['revision'] = target_rev
1317 p1r = revlog.rev(revinfo.p1)
1319 p1r = revlog.rev(revinfo.p1)
1318 p2r = revlog.rev(revinfo.p2)
1320 p2r = revlog.rev(revinfo.p2)
1319 if p1r != nullrev:
1321 if p1r != nullrev:
1320 p1_chain_len = revlog._chaininfo(p1r)[0]
1322 p1_chain_len = revlog._chaininfo(p1r)[0]
1321 else:
1323 else:
1322 p1_chain_len = -1
1324 p1_chain_len = -1
1323 if p2r != nullrev:
1325 if p2r != nullrev:
1324 p2_chain_len = revlog._chaininfo(p2r)[0]
1326 p2_chain_len = revlog._chaininfo(p2r)[0]
1325 else:
1327 else:
1326 p2_chain_len = -1
1328 p2_chain_len = -1
1327 dbg['p1-chain-len'] = p1_chain_len
1329 dbg['p1-chain-len'] = p1_chain_len
1328 dbg['p2-chain-len'] = p2_chain_len
1330 dbg['p2-chain-len'] = p2_chain_len
1329
1331
1330 # 1) if the revision is empty, no amount of delta can beat it
1332 # 1) if the revision is empty, no amount of delta can beat it
1331 #
1333 #
1332 # 2) no delta for flag processor revision (see "candelta" for why)
1334 # 2) no delta for flag processor revision (see "candelta" for why)
1333 # not calling candelta since only one revision needs test, also to
1335 # not calling candelta since only one revision needs test, also to
1334 # avoid overhead fetching flags again.
1336 # avoid overhead fetching flags again.
1335 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1337 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1336 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1338 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1337 if gather_debug:
1339 if gather_debug:
1338 end = util.timer()
1340 end = util.timer()
1339 dbg['duration'] = end - start
1341 dbg['duration'] = end - start
1340 dbg[
1342 dbg[
1341 'delta-base'
1343 'delta-base'
1342 ] = deltainfo.base # pytype: disable=attribute-error
1344 ] = deltainfo.base # pytype: disable=attribute-error
1343 dbg['search_round_count'] = 0
1345 dbg['search_round_count'] = 0
1344 dbg['using-cached-base'] = False
1346 dbg['using-cached-base'] = False
1345 dbg['delta_try_count'] = 0
1347 dbg['delta_try_count'] = 0
1346 dbg['type'] = b"full"
1348 dbg['type'] = b"full"
1347 dbg['snapshot-depth'] = 0
1349 dbg['snapshot-depth'] = 0
1348 self._dbg_process_data(dbg)
1350 self._dbg_process_data(dbg)
1349 return deltainfo
1351 return deltainfo
1350
1352
1351 deltainfo = None
1353 deltainfo = None
1352
1354
1353 # If this source delta are to be forcibly reuse, let us comply early.
1355 # If this source delta are to be forcibly reuse, let us comply early.
1354 if (
1356 if (
1355 revlog.delta_config.general_delta
1357 revlog.delta_config.general_delta
1356 and revinfo.cachedelta is not None
1358 and revinfo.cachedelta is not None
1357 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1359 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1358 ):
1360 ):
1359 base = revinfo.cachedelta[0]
1361 base = revinfo.cachedelta[0]
1360 if base == nullrev:
1362 if base == nullrev:
1361 dbg_type = b"full"
1363 dbg_type = b"full"
1362 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1364 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1363 if gather_debug:
1365 if gather_debug:
1364 snapshotdepth = 0
1366 snapshotdepth = 0
1365 elif base not in excluded_bases:
1367 elif base not in excluded_bases:
1366 delta = revinfo.cachedelta[1]
1368 delta = revinfo.cachedelta[1]
1367 header, data = revlog.compress(delta)
1369 header, data = revlog.compress(delta)
1368 deltalen = len(header) + len(data)
1370 deltalen = len(header) + len(data)
1369 if gather_debug:
1371 if gather_debug:
1370 offset = revlog.end(len(revlog) - 1)
1372 offset = revlog.end(len(revlog) - 1)
1371 chainbase = revlog.chainbase(base)
1373 chainbase = revlog.chainbase(base)
1372 distance = deltalen + offset - revlog.start(chainbase)
1374 distance = deltalen + offset - revlog.start(chainbase)
1373 chainlen, compresseddeltalen = revlog._chaininfo(base)
1375 chainlen, compresseddeltalen = revlog._chaininfo(base)
1374 chainlen += 1
1376 chainlen += 1
1375 compresseddeltalen += deltalen
1377 compresseddeltalen += deltalen
1376 if base == p1r or base == p2r:
1378 if base == p1r or base == p2r:
1377 dbg_type = b"delta"
1379 dbg_type = b"delta"
1378 snapshotdepth = None
1380 snapshotdepth = None
1379 elif not revlog.issnapshot(base):
1381 elif not revlog.issnapshot(base):
1380 snapshotdepth = None
1382 snapshotdepth = None
1381 else:
1383 else:
1382 dbg_type = b"snapshot"
1384 dbg_type = b"snapshot"
1383 snapshotdepth = revlog.snapshotdepth(base) + 1
1385 snapshotdepth = revlog.snapshotdepth(base) + 1
1384 else:
1386 else:
1385 distance = None
1387 distance = None
1386 chainbase = None
1388 chainbase = None
1387 chainlen = None
1389 chainlen = None
1388 compresseddeltalen = None
1390 compresseddeltalen = None
1389 snapshotdepth = None
1391 snapshotdepth = None
1390 deltainfo = _deltainfo(
1392 deltainfo = _deltainfo(
1391 distance=distance,
1393 distance=distance,
1392 deltalen=deltalen,
1394 deltalen=deltalen,
1393 data=(header, data),
1395 data=(header, data),
1394 base=base,
1396 base=base,
1395 chainbase=chainbase,
1397 chainbase=chainbase,
1396 chainlen=chainlen,
1398 chainlen=chainlen,
1397 compresseddeltalen=compresseddeltalen,
1399 compresseddeltalen=compresseddeltalen,
1398 snapshotdepth=snapshotdepth,
1400 snapshotdepth=snapshotdepth,
1399 )
1401 )
1400
1402
1401 if deltainfo is not None:
1403 if deltainfo is not None:
1402 if gather_debug:
1404 if gather_debug:
1403 end = util.timer()
1405 end = util.timer()
1404 dbg['duration'] = end - start
1406 dbg['duration'] = end - start
1405 dbg[
1407 dbg[
1406 'delta-base'
1408 'delta-base'
1407 ] = deltainfo.base # pytype: disable=attribute-error
1409 ] = deltainfo.base # pytype: disable=attribute-error
1408 dbg['search_round_count'] = 0
1410 dbg['search_round_count'] = 0
1409 dbg['using-cached-base'] = True
1411 dbg['using-cached-base'] = True
1410 dbg['delta_try_count'] = 0
1412 dbg['delta_try_count'] = 0
1411 dbg['type'] = b"full"
1413 dbg['type'] = b"full"
1412 if snapshotdepth is None:
1414 if snapshotdepth is None:
1413 dbg['snapshot-depth'] = 0
1415 dbg['snapshot-depth'] = 0
1414 else:
1416 else:
1415 dbg['snapshot-depth'] = snapshotdepth
1417 dbg['snapshot-depth'] = snapshotdepth
1416 self._dbg_process_data(dbg)
1418 self._dbg_process_data(dbg)
1417 return deltainfo
1419 return deltainfo
1418
1420
1419 # count the number of different delta we tried (for debug purpose)
1421 # count the number of different delta we tried (for debug purpose)
1420 dbg_try_count = 0
1422 dbg_try_count = 0
1421 # count the number of "search round" we did. (for debug purpose)
1423 # count the number of "search round" we did. (for debug purpose)
1422 dbg_try_rounds = 0
1424 dbg_try_rounds = 0
1423 dbg_type = b'unknown'
1425 dbg_type = b'unknown'
1424
1426
1425 if p1r is None:
1427 if p1r is None:
1426 p1r = revlog.rev(revinfo.p1)
1428 p1r = revlog.rev(revinfo.p1)
1427 p2r = revlog.rev(revinfo.p2)
1429 p2r = revlog.rev(revinfo.p2)
1428
1430
1429 if self._debug_search:
1431 if self._debug_search:
1430 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1432 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1431 msg %= target_rev
1433 msg %= target_rev
1432 self._write_debug(msg)
1434 self._write_debug(msg)
1433
1435
1434 search = _DeltaSearch(
1436 search = _DeltaSearch(
1435 self.revlog,
1437 self.revlog,
1436 revinfo,
1438 revinfo,
1437 p1r,
1439 p1r,
1438 p2r,
1440 p2r,
1439 cachedelta,
1441 cachedelta,
1440 excluded_bases,
1442 excluded_bases,
1441 target_rev,
1443 target_rev,
1442 snapshot_cache=self._snapshot_cache,
1444 snapshot_cache=self._snapshot_cache,
1443 )
1445 )
1444
1446
1445 groups = search.candidate_groups()
1447 groups = search.candidate_groups()
1446 candidaterevs = next(groups)
1448 candidaterevs = next(groups)
1447 while candidaterevs is not None:
1449 while candidaterevs is not None:
1448 dbg_try_rounds += 1
1450 dbg_try_rounds += 1
1449 if self._debug_search:
1451 if self._debug_search:
1450 prev = None
1452 prev = None
1451 if deltainfo is not None:
1453 if deltainfo is not None:
1452 prev = deltainfo.base
1454 prev = deltainfo.base
1453
1455
1454 if (
1456 if (
1455 cachedelta is not None
1457 cachedelta is not None
1456 and len(candidaterevs) == 1
1458 and len(candidaterevs) == 1
1457 and cachedelta[0] in candidaterevs
1459 and cachedelta[0] in candidaterevs
1458 ):
1460 ):
1459 round_type = b"cached-delta"
1461 round_type = b"cached-delta"
1460 elif p1r in candidaterevs or p2r in candidaterevs:
1462 elif p1r in candidaterevs or p2r in candidaterevs:
1461 round_type = b"parents"
1463 round_type = b"parents"
1462 elif prev is not None and all(c < prev for c in candidaterevs):
1464 elif prev is not None and all(c < prev for c in candidaterevs):
1463 round_type = b"refine-down"
1465 round_type = b"refine-down"
1464 elif prev is not None and all(c > prev for c in candidaterevs):
1466 elif prev is not None and all(c > prev for c in candidaterevs):
1465 round_type = b"refine-up"
1467 round_type = b"refine-up"
1466 else:
1468 else:
1467 round_type = b"search-down"
1469 round_type = b"search-down"
1468 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1470 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1469 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1471 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1470 self._write_debug(msg)
1472 self._write_debug(msg)
1471 nominateddeltas = []
1473 nominateddeltas = []
1472 if deltainfo is not None:
1474 if deltainfo is not None:
1473 if self._debug_search:
1475 if self._debug_search:
1474 msg = (
1476 msg = (
1475 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1477 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1476 )
1478 )
1477 msg %= (deltainfo.base, deltainfo.deltalen)
1479 msg %= (deltainfo.base, deltainfo.deltalen)
1478 self._write_debug(msg)
1480 self._write_debug(msg)
1479 # if we already found a good delta,
1481 # if we already found a good delta,
1480 # challenge it against refined candidates
1482 # challenge it against refined candidates
1481 nominateddeltas.append(deltainfo)
1483 nominateddeltas.append(deltainfo)
1482 for candidaterev in candidaterevs:
1484 for candidaterev in candidaterevs:
1483 if self._debug_search:
1485 if self._debug_search:
1484 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1486 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1485 msg %= candidaterev
1487 msg %= candidaterev
1486 self._write_debug(msg)
1488 self._write_debug(msg)
1487 candidate_type = None
1489 candidate_type = None
1488 if candidaterev == p1r:
1490 if candidaterev == p1r:
1489 candidate_type = b"p1"
1491 candidate_type = b"p1"
1490 elif candidaterev == p2r:
1492 elif candidaterev == p2r:
1491 candidate_type = b"p2"
1493 candidate_type = b"p2"
1492 elif self.revlog.issnapshot(candidaterev):
1494 elif self.revlog.issnapshot(candidaterev):
1493 candidate_type = b"snapshot-%d"
1495 candidate_type = b"snapshot-%d"
1494 candidate_type %= self.revlog.snapshotdepth(
1496 candidate_type %= self.revlog.snapshotdepth(
1495 candidaterev
1497 candidaterev
1496 )
1498 )
1497
1499
1498 if candidate_type is not None:
1500 if candidate_type is not None:
1499 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1501 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1500 msg %= candidate_type
1502 msg %= candidate_type
1501 self._write_debug(msg)
1503 self._write_debug(msg)
1502 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1504 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1503 msg %= self.revlog.length(candidaterev)
1505 msg %= self.revlog.length(candidaterev)
1504 self._write_debug(msg)
1506 self._write_debug(msg)
1505 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1507 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1506 msg %= self.revlog.deltaparent(candidaterev)
1508 msg %= self.revlog.deltaparent(candidaterev)
1507 self._write_debug(msg)
1509 self._write_debug(msg)
1508
1510
1509 dbg_try_count += 1
1511 dbg_try_count += 1
1510
1512
1511 if self._debug_search:
1513 if self._debug_search:
1512 delta_start = util.timer()
1514 delta_start = util.timer()
1513 candidatedelta = self._builddeltainfo(
1515 candidatedelta = self._builddeltainfo(
1514 revinfo,
1516 revinfo,
1515 candidaterev,
1517 candidaterev,
1516 target_rev=target_rev,
1518 target_rev=target_rev,
1517 )
1519 )
1518 if self._debug_search:
1520 if self._debug_search:
1519 delta_end = util.timer()
1521 delta_end = util.timer()
1520 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1522 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1521 msg %= delta_end - delta_start
1523 msg %= delta_end - delta_start
1522 self._write_debug(msg)
1524 self._write_debug(msg)
1523 if candidatedelta is not None:
1525 if candidatedelta is not None:
1524 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1526 if search.is_good_delta_info(candidatedelta):
1525 if self._debug_search:
1527 if self._debug_search:
1526 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1528 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1527 msg %= candidatedelta.deltalen
1529 msg %= candidatedelta.deltalen
1528 self._write_debug(msg)
1530 self._write_debug(msg)
1529 nominateddeltas.append(candidatedelta)
1531 nominateddeltas.append(candidatedelta)
1530 elif self._debug_search:
1532 elif self._debug_search:
1531 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1533 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1532 msg %= candidatedelta.deltalen
1534 msg %= candidatedelta.deltalen
1533 self._write_debug(msg)
1535 self._write_debug(msg)
1534 elif self._debug_search:
1536 elif self._debug_search:
1535 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1537 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1536 self._write_debug(msg)
1538 self._write_debug(msg)
1537 if nominateddeltas:
1539 if nominateddeltas:
1538 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1540 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1539 if deltainfo is not None:
1541 if deltainfo is not None:
1540 candidaterevs = groups.send(deltainfo.base)
1542 candidaterevs = groups.send(deltainfo.base)
1541 else:
1543 else:
1542 candidaterevs = next(groups)
1544 candidaterevs = next(groups)
1543
1545
1544 if deltainfo is None:
1546 if deltainfo is None:
1545 dbg_type = b"full"
1547 dbg_type = b"full"
1546 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1548 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1547 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1549 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1548 dbg_type = b"snapshot"
1550 dbg_type = b"snapshot"
1549 else:
1551 else:
1550 dbg_type = b"delta"
1552 dbg_type = b"delta"
1551
1553
1552 if gather_debug:
1554 if gather_debug:
1553 end = util.timer()
1555 end = util.timer()
1554 if dbg_type == b'full':
1556 if dbg_type == b'full':
1555 used_cached = (
1557 used_cached = (
1556 cachedelta is not None
1558 cachedelta is not None
1557 and dbg_try_rounds == 0
1559 and dbg_try_rounds == 0
1558 and dbg_try_count == 0
1560 and dbg_try_count == 0
1559 and cachedelta[0] == nullrev
1561 and cachedelta[0] == nullrev
1560 )
1562 )
1561 else:
1563 else:
1562 used_cached = (
1564 used_cached = (
1563 cachedelta is not None
1565 cachedelta is not None
1564 and dbg_try_rounds == 1
1566 and dbg_try_rounds == 1
1565 and dbg_try_count == 1
1567 and dbg_try_count == 1
1566 and deltainfo.base == cachedelta[0]
1568 and deltainfo.base == cachedelta[0]
1567 )
1569 )
1568 dbg['duration'] = end - start
1570 dbg['duration'] = end - start
1569 dbg[
1571 dbg[
1570 'delta-base'
1572 'delta-base'
1571 ] = deltainfo.base # pytype: disable=attribute-error
1573 ] = deltainfo.base # pytype: disable=attribute-error
1572 dbg['search_round_count'] = dbg_try_rounds
1574 dbg['search_round_count'] = dbg_try_rounds
1573 dbg['using-cached-base'] = used_cached
1575 dbg['using-cached-base'] = used_cached
1574 dbg['delta_try_count'] = dbg_try_count
1576 dbg['delta_try_count'] = dbg_try_count
1575 dbg['type'] = dbg_type
1577 dbg['type'] = dbg_type
1576 if (
1578 if (
1577 deltainfo.snapshotdepth # pytype: disable=attribute-error
1579 deltainfo.snapshotdepth # pytype: disable=attribute-error
1578 is not None
1580 is not None
1579 ):
1581 ):
1580 dbg[
1582 dbg[
1581 'snapshot-depth'
1583 'snapshot-depth'
1582 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1584 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1583 else:
1585 else:
1584 dbg['snapshot-depth'] = 0
1586 dbg['snapshot-depth'] = 0
1585 self._dbg_process_data(dbg)
1587 self._dbg_process_data(dbg)
1586 return deltainfo
1588 return deltainfo
1587
1589
1588 def _one_dbg_data(self):
1590 def _one_dbg_data(self):
1589 dbg = {
1591 dbg = {
1590 'duration': None,
1592 'duration': None,
1591 'revision': None,
1593 'revision': None,
1592 'delta-base': None,
1594 'delta-base': None,
1593 'search_round_count': None,
1595 'search_round_count': None,
1594 'using-cached-base': None,
1596 'using-cached-base': None,
1595 'delta_try_count': None,
1597 'delta_try_count': None,
1596 'type': None,
1598 'type': None,
1597 'p1-chain-len': None,
1599 'p1-chain-len': None,
1598 'p2-chain-len': None,
1600 'p2-chain-len': None,
1599 'snapshot-depth': None,
1601 'snapshot-depth': None,
1600 'target-revlog': None,
1602 'target-revlog': None,
1601 }
1603 }
1602 target_revlog = b"UNKNOWN"
1604 target_revlog = b"UNKNOWN"
1603 target_type = self.revlog.target[0]
1605 target_type = self.revlog.target[0]
1604 target_key = self.revlog.target[1]
1606 target_key = self.revlog.target[1]
1605 if target_type == KIND_CHANGELOG:
1607 if target_type == KIND_CHANGELOG:
1606 target_revlog = b'CHANGELOG:'
1608 target_revlog = b'CHANGELOG:'
1607 elif target_type == KIND_MANIFESTLOG:
1609 elif target_type == KIND_MANIFESTLOG:
1608 target_revlog = b'MANIFESTLOG:'
1610 target_revlog = b'MANIFESTLOG:'
1609 if target_key:
1611 if target_key:
1610 target_revlog += b'%s:' % target_key
1612 target_revlog += b'%s:' % target_key
1611 elif target_type == KIND_FILELOG:
1613 elif target_type == KIND_FILELOG:
1612 target_revlog = b'FILELOG:'
1614 target_revlog = b'FILELOG:'
1613 if target_key:
1615 if target_key:
1614 target_revlog += b'%s:' % target_key
1616 target_revlog += b'%s:' % target_key
1615 dbg['target-revlog'] = target_revlog
1617 dbg['target-revlog'] = target_revlog
1616 return dbg
1618 return dbg
1617
1619
1618 def _dbg_process_data(self, dbg):
1620 def _dbg_process_data(self, dbg):
1619 if self._debug_info is not None:
1621 if self._debug_info is not None:
1620 self._debug_info.append(dbg)
1622 self._debug_info.append(dbg)
1621
1623
1622 if self._write_debug is not None:
1624 if self._write_debug is not None:
1623 msg = (
1625 msg = (
1624 b"DBG-DELTAS:"
1626 b"DBG-DELTAS:"
1625 b" %-12s"
1627 b" %-12s"
1626 b" rev=%d:"
1628 b" rev=%d:"
1627 b" delta-base=%d"
1629 b" delta-base=%d"
1628 b" is-cached=%d"
1630 b" is-cached=%d"
1629 b" - search-rounds=%d"
1631 b" - search-rounds=%d"
1630 b" try-count=%d"
1632 b" try-count=%d"
1631 b" - delta-type=%-6s"
1633 b" - delta-type=%-6s"
1632 b" snap-depth=%d"
1634 b" snap-depth=%d"
1633 b" - p1-chain-length=%d"
1635 b" - p1-chain-length=%d"
1634 b" p2-chain-length=%d"
1636 b" p2-chain-length=%d"
1635 b" - duration=%f"
1637 b" - duration=%f"
1636 b"\n"
1638 b"\n"
1637 )
1639 )
1638 msg %= (
1640 msg %= (
1639 dbg["target-revlog"],
1641 dbg["target-revlog"],
1640 dbg["revision"],
1642 dbg["revision"],
1641 dbg["delta-base"],
1643 dbg["delta-base"],
1642 dbg["using-cached-base"],
1644 dbg["using-cached-base"],
1643 dbg["search_round_count"],
1645 dbg["search_round_count"],
1644 dbg["delta_try_count"],
1646 dbg["delta_try_count"],
1645 dbg["type"],
1647 dbg["type"],
1646 dbg["snapshot-depth"],
1648 dbg["snapshot-depth"],
1647 dbg["p1-chain-len"],
1649 dbg["p1-chain-len"],
1648 dbg["p2-chain-len"],
1650 dbg["p2-chain-len"],
1649 dbg["duration"],
1651 dbg["duration"],
1650 )
1652 )
1651 self._write_debug(msg)
1653 self._write_debug(msg)
1652
1654
1653
1655
1654 def delta_compression(default_compression_header, deltainfo):
1656 def delta_compression(default_compression_header, deltainfo):
1655 """return (COMPRESSION_MODE, deltainfo)
1657 """return (COMPRESSION_MODE, deltainfo)
1656
1658
1657 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1659 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1658 compression.
1660 compression.
1659 """
1661 """
1660 h, d = deltainfo.data
1662 h, d = deltainfo.data
1661 compression_mode = COMP_MODE_INLINE
1663 compression_mode = COMP_MODE_INLINE
1662 if not h and not d:
1664 if not h and not d:
1663 # not data to store at all... declare them uncompressed
1665 # not data to store at all... declare them uncompressed
1664 compression_mode = COMP_MODE_PLAIN
1666 compression_mode = COMP_MODE_PLAIN
1665 elif not h:
1667 elif not h:
1666 t = d[0:1]
1668 t = d[0:1]
1667 if t == b'\0':
1669 if t == b'\0':
1668 compression_mode = COMP_MODE_PLAIN
1670 compression_mode = COMP_MODE_PLAIN
1669 elif t == default_compression_header:
1671 elif t == default_compression_header:
1670 compression_mode = COMP_MODE_DEFAULT
1672 compression_mode = COMP_MODE_DEFAULT
1671 elif h == b'u':
1673 elif h == b'u':
1672 # we have a more efficient way to declare uncompressed
1674 # we have a more efficient way to declare uncompressed
1673 h = b''
1675 h = b''
1674 compression_mode = COMP_MODE_PLAIN
1676 compression_mode = COMP_MODE_PLAIN
1675 deltainfo = drop_u_compression(deltainfo)
1677 deltainfo = drop_u_compression(deltainfo)
1676 return compression_mode, deltainfo
1678 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now