##// END OF EJS Templates
delta-find: move `_rawgroups` on the `_DeltaSearch` object...
marmoute -
r52215:a227e061 default
parent child Browse files
Show More
@@ -1,1673 +1,1673 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
590 performant."""
591 if deltainfo is None:
591 if deltainfo is None:
592 return False
592 return False
593
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
597 assert (
598 revinfo.cachedelta is None
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
601 )
601 )
602
602
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
607 # we consume.
608
608
609 textlen = revinfo.textlen
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
610 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
611 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
615
615
616 # Bad delta from read span:
616 # Bad delta from read span:
617 #
617 #
618 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
619 #
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
624 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
626 return False
627
627
628 # Bad delta from new delta size:
628 # Bad delta from new delta size:
629 #
629 #
630 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
633 return False
633 return False
634
634
635 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
636 #
636 #
637 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
639 return False
640
640
641 # Bad delta from chain length:
641 # Bad delta from chain length:
642 #
642 #
643 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
644 if (
644 if (
645 revlog.delta_config.max_chain_len
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
651 #
651 #
652 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
654 # created.
655 if (
655 if (
656 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
658 ):
659 return False
659 return False
660
660
661 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
662 # snapshot
663 if (
663 if (
664 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
666 ):
667 return False
667 return False
668
668
669 return True
669 return True
670
670
671
671
672 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
674 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
676
676
677
677
678 class _DeltaSearch:
678 class _DeltaSearch:
679 """perform the search of a good delta for a single revlog revision
679 """perform the search of a good delta for a single revlog revision
680
680
681 note: some of the deltacomputer.finddeltainfo logic should probably move
681 note: some of the deltacomputer.finddeltainfo logic should probably move
682 here.
682 here.
683 """
683 """
684
684
685 def __init__(
685 def __init__(
686 self,
686 self,
687 revlog,
687 revlog,
688 textlen,
688 textlen,
689 p1,
689 p1,
690 p2,
690 p2,
691 cachedelta,
691 cachedelta,
692 excluded_bases=None,
692 excluded_bases=None,
693 target_rev=None,
693 target_rev=None,
694 snapshot_cache=None,
694 snapshot_cache=None,
695 ):
695 ):
696 self.revlog = revlog
696 self.revlog = revlog
697 self.textlen = textlen
697 self.textlen = textlen
698 self.p1 = p1
698 self.p1 = p1
699 self.p2 = p2
699 self.p2 = p2
700 self.cachedelta = cachedelta
700 self.cachedelta = cachedelta
701 self.excluded_bases = excluded_bases
701 self.excluded_bases = excluded_bases
702 self.target_rev = target_rev
702 self.target_rev = target_rev
703 self.snapshot_cache = snapshot_cache
703 self.snapshot_cache = snapshot_cache
704
704
705 def candidate_groups(self):
705 def candidate_groups(self):
706 """Provides group of revision to be tested as delta base
706 """Provides group of revision to be tested as delta base
707
707
708 This top level function focus on emitting groups with unique and
708 This top level function focus on emitting groups with unique and
709 worthwhile content. See _raw_candidate_groups for details about the
709 worthwhile content. See _raw_candidate_groups for details about the
710 group order.
710 group order.
711 """
711 """
712 # should we try to build a delta?
712 # should we try to build a delta?
713 if not (len(self.revlog) and self.revlog._storedeltachains):
713 if not (len(self.revlog) and self.revlog._storedeltachains):
714 yield None
714 yield None
715 return
715 return
716
716
717 if self.target_rev is None:
717 if self.target_rev is None:
718 self.target_rev = len(self.revlog)
718 self.target_rev = len(self.revlog)
719
719
720 if not self.revlog.delta_config.general_delta:
720 if not self.revlog.delta_config.general_delta:
721 # before general delta, there is only one possible delta base
721 # before general delta, there is only one possible delta base
722 yield (self.target_rev - 1,)
722 yield (self.target_rev - 1,)
723 yield None
723 yield None
724 return
724 return
725
725
726 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
726 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
727 # so we should never end up asking such question. Adding the assert as
727 # so we should never end up asking such question. Adding the assert as
728 # a safe-guard to detect anything that would be fishy in this regard.
728 # a safe-guard to detect anything that would be fishy in this regard.
729 assert (
729 assert (
730 self.cachedelta is None
730 self.cachedelta is None
731 or self.cachedelta[2] != DELTA_BASE_REUSE_FORCE
731 or self.cachedelta[2] != DELTA_BASE_REUSE_FORCE
732 or not self.revlog.delta_config.general_delta
732 or not self.revlog.delta_config.general_delta
733 )
733 )
734
734
735 deltalength = self.revlog.length
735 deltalength = self.revlog.length
736 deltaparent = self.revlog.deltaparent
736 deltaparent = self.revlog.deltaparent
737 sparse = self.revlog.delta_config.sparse_revlog
737 sparse = self.revlog.delta_config.sparse_revlog
738 good = None
738 good = None
739
739
740 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
740 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
741 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
741 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
742
742
743 tested = {nullrev}
743 tested = {nullrev}
744 candidates = self._refined_groups()
744 candidates = self._refined_groups()
745 while True:
745 while True:
746 temptative = candidates.send(good)
746 temptative = candidates.send(good)
747 if temptative is None:
747 if temptative is None:
748 break
748 break
749 group = []
749 group = []
750 for rev in temptative:
750 for rev in temptative:
751 # skip over empty delta (no need to include them in a chain)
751 # skip over empty delta (no need to include them in a chain)
752 while not (rev == nullrev or rev in tested or deltalength(rev)):
752 while not (rev == nullrev or rev in tested or deltalength(rev)):
753 tested.add(rev)
753 tested.add(rev)
754 rev = deltaparent(rev)
754 rev = deltaparent(rev)
755 # no need to try a delta against nullrev, this will be done as
755 # no need to try a delta against nullrev, this will be done as
756 # a last resort.
756 # a last resort.
757 if rev == nullrev:
757 if rev == nullrev:
758 continue
758 continue
759 # filter out revision we tested already
759 # filter out revision we tested already
760 if rev in tested:
760 if rev in tested:
761 continue
761 continue
762
762
763 # an higher authority deamed the base unworthy (e.g. censored)
763 # an higher authority deamed the base unworthy (e.g. censored)
764 if (
764 if (
765 self.excluded_bases is not None
765 self.excluded_bases is not None
766 and rev in self.excluded_bases
766 and rev in self.excluded_bases
767 ):
767 ):
768 tested.add(rev)
768 tested.add(rev)
769 continue
769 continue
770 # We are in some recomputation cases and that rev is too high
770 # We are in some recomputation cases and that rev is too high
771 # in the revlog
771 # in the revlog
772 if self.target_rev is not None and rev >= self.target_rev:
772 if self.target_rev is not None and rev >= self.target_rev:
773 tested.add(rev)
773 tested.add(rev)
774 continue
774 continue
775 # filter out delta base that will never produce good delta
775 # filter out delta base that will never produce good delta
776 if deltas_limit < self.revlog.length(rev):
776 if deltas_limit < self.revlog.length(rev):
777 tested.add(rev)
777 tested.add(rev)
778 continue
778 continue
779 if sparse and self.revlog.rawsize(rev) < (
779 if sparse and self.revlog.rawsize(rev) < (
780 self.textlen // LIMIT_BASE2TEXT
780 self.textlen // LIMIT_BASE2TEXT
781 ):
781 ):
782 tested.add(rev)
782 tested.add(rev)
783 continue
783 continue
784 # no delta for rawtext-changing revs (see "candelta" for why)
784 # no delta for rawtext-changing revs (see "candelta" for why)
785 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
785 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
786 tested.add(rev)
786 tested.add(rev)
787 continue
787 continue
788
788
789 # If we reach here, we are about to build and test a delta.
789 # If we reach here, we are about to build and test a delta.
790 # The delta building process will compute the chaininfo in all
790 # The delta building process will compute the chaininfo in all
791 # case, since that computation is cached, it is fine to access
791 # case, since that computation is cached, it is fine to access
792 # it here too.
792 # it here too.
793 chainlen, chainsize = self.revlog._chaininfo(rev)
793 chainlen, chainsize = self.revlog._chaininfo(rev)
794 # if chain will be too long, skip base
794 # if chain will be too long, skip base
795 if (
795 if (
796 self.revlog.delta_config.max_chain_len
796 self.revlog.delta_config.max_chain_len
797 and chainlen >= self.revlog.delta_config.max_chain_len
797 and chainlen >= self.revlog.delta_config.max_chain_len
798 ):
798 ):
799 tested.add(rev)
799 tested.add(rev)
800 continue
800 continue
801 # if chain already have too much data, skip base
801 # if chain already have too much data, skip base
802 if deltas_limit < chainsize:
802 if deltas_limit < chainsize:
803 tested.add(rev)
803 tested.add(rev)
804 continue
804 continue
805 if (
805 if (
806 sparse
806 sparse
807 and self.revlog.delta_config.upper_bound_comp is not None
807 and self.revlog.delta_config.upper_bound_comp is not None
808 ):
808 ):
809 maxcomp = self.revlog.delta_config.upper_bound_comp
809 maxcomp = self.revlog.delta_config.upper_bound_comp
810 basenotsnap = (self.p1, self.p2, nullrev)
810 basenotsnap = (self.p1, self.p2, nullrev)
811 if rev not in basenotsnap and self.revlog.issnapshot(rev):
811 if rev not in basenotsnap and self.revlog.issnapshot(rev):
812 snapshotdepth = self.revlog.snapshotdepth(rev)
812 snapshotdepth = self.revlog.snapshotdepth(rev)
813 # If text is significantly larger than the base, we can
813 # If text is significantly larger than the base, we can
814 # expect the resulting delta to be proportional to the size
814 # expect the resulting delta to be proportional to the size
815 # difference
815 # difference
816 revsize = self.revlog.rawsize(rev)
816 revsize = self.revlog.rawsize(rev)
817 rawsizedistance = max(self.textlen - revsize, 0)
817 rawsizedistance = max(self.textlen - revsize, 0)
818 # use an estimate of the compression upper bound.
818 # use an estimate of the compression upper bound.
819 lowestrealisticdeltalen = rawsizedistance // maxcomp
819 lowestrealisticdeltalen = rawsizedistance // maxcomp
820
820
821 # check the absolute constraint on the delta size
821 # check the absolute constraint on the delta size
822 snapshotlimit = self.textlen >> snapshotdepth
822 snapshotlimit = self.textlen >> snapshotdepth
823 if snapshotlimit < lowestrealisticdeltalen:
823 if snapshotlimit < lowestrealisticdeltalen:
824 # delta lower bound is larger than accepted upper
824 # delta lower bound is larger than accepted upper
825 # bound
825 # bound
826 tested.add(rev)
826 tested.add(rev)
827 continue
827 continue
828
828
829 # check the relative constraint on the delta size
829 # check the relative constraint on the delta size
830 revlength = self.revlog.length(rev)
830 revlength = self.revlog.length(rev)
831 if revlength < lowestrealisticdeltalen:
831 if revlength < lowestrealisticdeltalen:
832 # delta probable lower bound is larger than target
832 # delta probable lower bound is larger than target
833 # base
833 # base
834 tested.add(rev)
834 tested.add(rev)
835 continue
835 continue
836
836
837 group.append(rev)
837 group.append(rev)
838 if group:
838 if group:
839 # When the size of the candidate group is big, it can result in
839 # When the size of the candidate group is big, it can result in
840 # a quite significant performance impact. To reduce this, we
840 # a quite significant performance impact. To reduce this, we
841 # can send them in smaller batches until the new batch does not
841 # can send them in smaller batches until the new batch does not
842 # provide any improvements.
842 # provide any improvements.
843 #
843 #
844 # This might reduce the overall efficiency of the compression
844 # This might reduce the overall efficiency of the compression
845 # in some corner cases, but that should also prevent very
845 # in some corner cases, but that should also prevent very
846 # pathological cases from being an issue. (eg. 20 000
846 # pathological cases from being an issue. (eg. 20 000
847 # candidates).
847 # candidates).
848 #
848 #
849 # XXX note that the ordering of the group becomes important as
849 # XXX note that the ordering of the group becomes important as
850 # it now impacts the final result. The current order is
850 # it now impacts the final result. The current order is
851 # unprocessed and can be improved.
851 # unprocessed and can be improved.
852 if group_chunk_size == 0:
852 if group_chunk_size == 0:
853 tested.update(group)
853 tested.update(group)
854 good = yield tuple(group)
854 good = yield tuple(group)
855 else:
855 else:
856 prev_good = good
856 prev_good = good
857 for start in range(0, len(group), group_chunk_size):
857 for start in range(0, len(group), group_chunk_size):
858 sub_group = group[start : start + group_chunk_size]
858 sub_group = group[start : start + group_chunk_size]
859 tested.update(sub_group)
859 tested.update(sub_group)
860 good = yield tuple(sub_group)
860 good = yield tuple(sub_group)
861 if prev_good == good:
861 if prev_good == good:
862 break
862 break
863
863
864 yield None
864 yield None
865
865
866 def _refined_groups(self):
866 def _refined_groups(self):
867 good = None
867 good = None
868 # First we try to reuse a the delta contained in the bundle. (or from
868 # First we try to reuse a the delta contained in the bundle. (or from
869 # the source revlog)
869 # the source revlog)
870 #
870 #
871 # This logic only applies to general delta repositories and can be
871 # This logic only applies to general delta repositories and can be
872 # disabled through configuration. Disabling reuse source delta is
872 # disabled through configuration. Disabling reuse source delta is
873 # useful when we want to make sure we recomputed "optimal" deltas.
873 # useful when we want to make sure we recomputed "optimal" deltas.
874 debug_info = None
874 debug_info = None
875 if (
875 if (
876 self.cachedelta is not None
876 self.cachedelta is not None
877 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
877 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
878 ):
878 ):
879 # Assume what we received from the server is a good choice
879 # Assume what we received from the server is a good choice
880 # build delta will reuse the cache
880 # build delta will reuse the cache
881 if debug_info is not None:
881 if debug_info is not None:
882 debug_info['cached-delta.tested'] += 1
882 debug_info['cached-delta.tested'] += 1
883 good = yield (self.cachedelta[0],)
883 good = yield (self.cachedelta[0],)
884 if good is not None:
884 if good is not None:
885 if debug_info is not None:
885 if debug_info is not None:
886 debug_info['cached-delta.accepted'] += 1
886 debug_info['cached-delta.accepted'] += 1
887 yield None
887 yield None
888 return
888 return
889 if self.snapshot_cache is None:
889 if self.snapshot_cache is None:
890 self.snapshot_cache = SnapshotCache()
890 self.snapshot_cache = SnapshotCache()
891 groups = _rawgroups(
891 groups = self._raw_groups()
892 self.revlog,
893 self.p1,
894 self.p2,
895 self.cachedelta,
896 self.snapshot_cache,
897 )
898 for candidates in groups:
892 for candidates in groups:
899 good = yield candidates
893 good = yield candidates
900 if good is not None:
894 if good is not None:
901 break
895 break
902
896
903 # If sparse revlog is enabled, we can try to refine the available
897 # If sparse revlog is enabled, we can try to refine the available
904 # deltas
898 # deltas
905 if not self.revlog.delta_config.sparse_revlog:
899 if not self.revlog.delta_config.sparse_revlog:
906 yield None
900 yield None
907 return
901 return
908
902
909 # if we have a refinable value, try to refine it
903 # if we have a refinable value, try to refine it
910 if (
904 if (
911 good is not None
905 good is not None
912 and good not in (self.p1, self.p2)
906 and good not in (self.p1, self.p2)
913 and self.revlog.issnapshot(good)
907 and self.revlog.issnapshot(good)
914 ):
908 ):
915 # refine snapshot down
909 # refine snapshot down
916 previous = None
910 previous = None
917 while previous != good:
911 while previous != good:
918 previous = good
912 previous = good
919 base = self.revlog.deltaparent(good)
913 base = self.revlog.deltaparent(good)
920 if base == nullrev:
914 if base == nullrev:
921 break
915 break
922 good = yield (base,)
916 good = yield (base,)
923 # refine snapshot up
917 # refine snapshot up
924 if not self.snapshot_cache.snapshots:
918 if not self.snapshot_cache.snapshots:
925 self.snapshot_cache.update(self.revlog, good + 1)
919 self.snapshot_cache.update(self.revlog, good + 1)
926 previous = None
920 previous = None
927 while good != previous:
921 while good != previous:
928 previous = good
922 previous = good
929 children = tuple(
923 children = tuple(
930 sorted(c for c in self.snapshot_cache.snapshots[good])
924 sorted(c for c in self.snapshot_cache.snapshots[good])
931 )
925 )
932 good = yield children
926 good = yield children
933
927
934 if debug_info is not None:
928 if debug_info is not None:
935 if good is None:
929 if good is None:
936 debug_info['no-solution'] += 1
930 debug_info['no-solution'] += 1
937
931
938 yield None
932 yield None
939
933
934 def _raw_groups(self):
935 """Provides group of revision to be tested as delta base
940
936
941 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
937 This lower level function focus on emitting delta theorically
942 """Provides group of revision to be tested as delta base
938 interresting without looking it any practical details.
943
939
944 This lower level function focus on emitting delta theorically interresting
940 The group order aims at providing fast or small candidates first.
945 without looking it any practical details.
941 """
942 # Why search for delta base if we cannot use a delta base ?
943 assert self.revlog.delta_config.general_delta
944 # also see issue6056
945 sparse = self.revlog.delta_config.sparse_revlog
946 curr = len(self.revlog)
947 prev = curr - 1
948 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
946
949
947 The group order aims at providing fast or small candidates first.
950 # exclude already lazy tested base if any
948 """
951 parents = [p for p in (self.p1, self.p2) if p != nullrev]
949 # Why search for delta base if we cannot use a delta base ?
950 assert revlog.delta_config.general_delta
951 # also see issue6056
952 sparse = revlog.delta_config.sparse_revlog
953 curr = len(revlog)
954 prev = curr - 1
955 deltachain = lambda rev: revlog._deltachain(rev)[0]
956
952
957 # exclude already lazy tested base if any
953 if (
958 parents = [p for p in (p1, p2) if p != nullrev]
954 not self.revlog.delta_config.delta_both_parents
955 and len(parents) == 2
956 ):
957 parents.sort()
958 # To minimize the chance of having to build a fulltext,
959 # pick first whichever parent is closest to us (max rev)
960 yield (parents[1],)
961 # then the other one (min rev) if the first did not fit
962 yield (parents[0],)
963 elif len(parents) > 0:
964 # Test all parents (1 or 2), and keep the best candidate
965 yield parents
959
966
960 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
967 if sparse and parents:
961 parents.sort()
968 if self.snapshot_cache is None:
962 # To minimize the chance of having to build a fulltext,
969 # map: base-rev: [snapshot-revs]
963 # pick first whichever parent is closest to us (max rev)
970 self.snapshot_cache = SnapshotCache()
964 yield (parents[1],)
971 # See if we can use an existing snapshot in the parent chains to
965 # then the other one (min rev) if the first did not fit
972 # use as a base for a new intermediate-snapshot
966 yield (parents[0],)
973 #
967 elif len(parents) > 0:
974 # search for snapshot in parents delta chain map: snapshot-level:
968 # Test all parents (1 or 2), and keep the best candidate
975 # snapshot-rev
969 yield parents
976 parents_snaps = collections.defaultdict(set)
970
977 candidate_chains = [deltachain(p) for p in parents]
971 if sparse and parents:
978 for chain in candidate_chains:
972 if snapshot_cache is None:
979 for idx, s in enumerate(chain):
973 # map: base-rev: [snapshot-revs]
980 if not self.revlog.issnapshot(s):
974 snapshot_cache = SnapshotCache()
981 break
975 # See if we can use an existing snapshot in the parent chains to use as
982 parents_snaps[idx].add(s)
976 # a base for a new intermediate-snapshot
983 snapfloor = min(parents_snaps[0]) + 1
977 #
984 self.snapshot_cache.update(self.revlog, snapfloor)
978 # search for snapshot in parents delta chain
985 # search for the highest "unrelated" revision
979 # map: snapshot-level: snapshot-rev
986 #
980 parents_snaps = collections.defaultdict(set)
987 # Adding snapshots used by "unrelated" revision increase the odd we
981 candidate_chains = [deltachain(p) for p in parents]
988 # reuse an independant, yet better snapshot chain.
982 for chain in candidate_chains:
989 #
983 for idx, s in enumerate(chain):
990 # XXX instead of building a set of revisions, we could lazily
984 if not revlog.issnapshot(s):
991 # enumerate over the chains. That would be more efficient, however
992 # we stick to simple code for now.
993 all_revs = set()
994 for chain in candidate_chains:
995 all_revs.update(chain)
996 other = None
997 for r in self.revlog.revs(prev, snapfloor):
998 if r not in all_revs:
999 other = r
985 break
1000 break
986 parents_snaps[idx].add(s)
1001 if other is not None:
987 snapfloor = min(parents_snaps[0]) + 1
1002 # To avoid unfair competition, we won't use unrelated
988 snapshot_cache.update(revlog, snapfloor)
1003 # intermediate snapshot that are deeper than the ones from the
989 # search for the highest "unrelated" revision
1004 # parent delta chain.
990 #
1005 max_depth = max(parents_snaps.keys())
991 # Adding snapshots used by "unrelated" revision increase the odd we
1006 chain = deltachain(other)
992 # reuse an independant, yet better snapshot chain.
1007 for depth, s in enumerate(chain):
993 #
1008 if s < snapfloor:
994 # XXX instead of building a set of revisions, we could lazily enumerate
1009 continue
995 # over the chains. That would be more efficient, however we stick to
1010 if max_depth < depth:
996 # simple code for now.
1011 break
997 all_revs = set()
1012 if not self.revlog.issnapshot(s):
998 for chain in candidate_chains:
1013 break
999 all_revs.update(chain)
1014 parents_snaps[depth].add(s)
1000 other = None
1015 # Test them as possible intermediate snapshot base We test them
1001 for r in revlog.revs(prev, snapfloor):
1016 # from highest to lowest level. High level one are more likely to
1002 if r not in all_revs:
1017 # result in small delta
1003 other = r
1018 floor = None
1004 break
1019 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1005 if other is not None:
1020 siblings = set()
1006 # To avoid unfair competition, we won't use unrelated intermediate
1021 for s in snaps:
1007 # snapshot that are deeper than the ones from the parent delta
1022 siblings.update(self.snapshot_cache.snapshots[s])
1008 # chain.
1023 # Before considering making a new intermediate snapshot, we
1009 max_depth = max(parents_snaps.keys())
1024 # check if an existing snapshot, children of base we consider,
1010 chain = deltachain(other)
1025 # would be suitable.
1011 for depth, s in enumerate(chain):
1026 #
1012 if s < snapfloor:
1027 # It give a change to reuse a delta chain "unrelated" to the
1013 continue
1028 # current revision instead of starting our own. Without such
1014 if max_depth < depth:
1029 # re-use, topological branches would keep reopening new chains.
1015 break
1030 # Creating more and more snapshot as the repository grow.
1016 if not revlog.issnapshot(s):
1031
1017 break
1032 if floor is not None:
1018 parents_snaps[depth].add(s)
1033 # We only do this for siblings created after the one in our
1019 # Test them as possible intermediate snapshot base
1034 # parent's delta chain. Those created before has less
1020 # We test them from highest to lowest level. High level one are more
1035 # chances to be valid base since our ancestors had to
1021 # likely to result in small delta
1036 # create a new snapshot.
1022 floor = None
1037 siblings = [r for r in siblings if floor < r]
1023 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1038 yield tuple(sorted(siblings))
1024 siblings = set()
1039 # then test the base from our parent's delta chain.
1025 for s in snaps:
1040 yield tuple(sorted(snaps))
1026 siblings.update(snapshot_cache.snapshots[s])
1041 floor = min(snaps)
1027 # Before considering making a new intermediate snapshot, we check
1042 # No suitable base found in the parent chain, search if any full
1028 # if an existing snapshot, children of base we consider, would be
1043 # snapshots emitted since parent's base would be a suitable base
1029 # suitable.
1044 # for an intermediate snapshot.
1030 #
1045 #
1031 # It give a change to reuse a delta chain "unrelated" to the
1046 # It give a chance to reuse a delta chain unrelated to the current
1032 # current revision instead of starting our own. Without such
1047 # revisions instead of starting our own. Without such re-use,
1033 # re-use, topological branches would keep reopening new chains.
1048 # topological branches would keep reopening new full chains.
1034 # Creating more and more snapshot as the repository grow.
1049 # Creating more and more snapshot as the repository grow.
1050 full = [
1051 r
1052 for r in self.snapshot_cache.snapshots[nullrev]
1053 if snapfloor <= r
1054 ]
1055 yield tuple(sorted(full))
1035
1056
1036 if floor is not None:
1057 if not sparse:
1037 # We only do this for siblings created after the one in our
1058 # other approach failed try against prev to hopefully save us a
1038 # parent's delta chain. Those created before has less chances
1059 # fulltext.
1039 # to be valid base since our ancestors had to create a new
1060 yield (prev,)
1040 # snapshot.
1041 siblings = [r for r in siblings if floor < r]
1042 yield tuple(sorted(siblings))
1043 # then test the base from our parent's delta chain.
1044 yield tuple(sorted(snaps))
1045 floor = min(snaps)
1046 # No suitable base found in the parent chain, search if any full
1047 # snapshots emitted since parent's base would be a suitable base for an
1048 # intermediate snapshot.
1049 #
1050 # It give a chance to reuse a delta chain unrelated to the current
1051 # revisions instead of starting our own. Without such re-use,
1052 # topological branches would keep reopening new full chains. Creating
1053 # more and more snapshot as the repository grow.
1054 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1055 yield tuple(sorted(full))
1056
1057 if not sparse:
1058 # other approach failed try against prev to hopefully save us a
1059 # fulltext.
1060 yield (prev,)
1061
1061
1062
1062
1063 class SnapshotCache:
1063 class SnapshotCache:
1064 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1064 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1065
1065
1066 def __init__(self):
1066 def __init__(self):
1067 self.snapshots = collections.defaultdict(set)
1067 self.snapshots = collections.defaultdict(set)
1068 self._start_rev = None
1068 self._start_rev = None
1069 self._end_rev = None
1069 self._end_rev = None
1070
1070
1071 def update(self, revlog, start_rev=0):
1071 def update(self, revlog, start_rev=0):
1072 """find snapshots from start_rev to tip"""
1072 """find snapshots from start_rev to tip"""
1073 nb_revs = len(revlog)
1073 nb_revs = len(revlog)
1074 end_rev = nb_revs - 1
1074 end_rev = nb_revs - 1
1075 if start_rev > end_rev:
1075 if start_rev > end_rev:
1076 return # range is empty
1076 return # range is empty
1077
1077
1078 if self._start_rev is None:
1078 if self._start_rev is None:
1079 assert self._end_rev is None
1079 assert self._end_rev is None
1080 self._update(revlog, start_rev, end_rev)
1080 self._update(revlog, start_rev, end_rev)
1081 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1081 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1082 if start_rev < self._start_rev:
1082 if start_rev < self._start_rev:
1083 self._update(revlog, start_rev, self._start_rev - 1)
1083 self._update(revlog, start_rev, self._start_rev - 1)
1084 if self._end_rev < end_rev:
1084 if self._end_rev < end_rev:
1085 self._update(revlog, self._end_rev + 1, end_rev)
1085 self._update(revlog, self._end_rev + 1, end_rev)
1086
1086
1087 if self._start_rev is None:
1087 if self._start_rev is None:
1088 assert self._end_rev is None
1088 assert self._end_rev is None
1089 self._end_rev = end_rev
1089 self._end_rev = end_rev
1090 self._start_rev = start_rev
1090 self._start_rev = start_rev
1091 else:
1091 else:
1092 self._start_rev = min(self._start_rev, start_rev)
1092 self._start_rev = min(self._start_rev, start_rev)
1093 self._end_rev = max(self._end_rev, end_rev)
1093 self._end_rev = max(self._end_rev, end_rev)
1094 assert self._start_rev <= self._end_rev, (
1094 assert self._start_rev <= self._end_rev, (
1095 self._start_rev,
1095 self._start_rev,
1096 self._end_rev,
1096 self._end_rev,
1097 )
1097 )
1098
1098
1099 def _update(self, revlog, start_rev, end_rev):
1099 def _update(self, revlog, start_rev, end_rev):
1100 """internal method that actually do update content"""
1100 """internal method that actually do update content"""
1101 assert self._start_rev is None or (
1101 assert self._start_rev is None or (
1102 start_rev < self._start_rev or start_rev > self._end_rev
1102 start_rev < self._start_rev or start_rev > self._end_rev
1103 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1103 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1104 assert self._start_rev is None or (
1104 assert self._start_rev is None or (
1105 end_rev < self._start_rev or end_rev > self._end_rev
1105 end_rev < self._start_rev or end_rev > self._end_rev
1106 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1106 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1107 cache = self.snapshots
1107 cache = self.snapshots
1108 if hasattr(revlog.index, 'findsnapshots'):
1108 if hasattr(revlog.index, 'findsnapshots'):
1109 revlog.index.findsnapshots(cache, start_rev, end_rev)
1109 revlog.index.findsnapshots(cache, start_rev, end_rev)
1110 else:
1110 else:
1111 deltaparent = revlog.deltaparent
1111 deltaparent = revlog.deltaparent
1112 issnapshot = revlog.issnapshot
1112 issnapshot = revlog.issnapshot
1113 for rev in revlog.revs(start_rev, end_rev):
1113 for rev in revlog.revs(start_rev, end_rev):
1114 if issnapshot(rev):
1114 if issnapshot(rev):
1115 cache[deltaparent(rev)].add(rev)
1115 cache[deltaparent(rev)].add(rev)
1116
1116
1117
1117
1118 class deltacomputer:
1118 class deltacomputer:
1119 """object capable of computing delta and finding delta for multiple revision
1119 """object capable of computing delta and finding delta for multiple revision
1120
1120
1121 This object is meant to compute and find multiple delta applied to the same
1121 This object is meant to compute and find multiple delta applied to the same
1122 revlog.
1122 revlog.
1123 """
1123 """
1124
1124
1125 def __init__(
1125 def __init__(
1126 self,
1126 self,
1127 revlog,
1127 revlog,
1128 write_debug=None,
1128 write_debug=None,
1129 debug_search=False,
1129 debug_search=False,
1130 debug_info=None,
1130 debug_info=None,
1131 ):
1131 ):
1132 self.revlog = revlog
1132 self.revlog = revlog
1133 self._write_debug = write_debug
1133 self._write_debug = write_debug
1134 if write_debug is None:
1134 if write_debug is None:
1135 self._debug_search = False
1135 self._debug_search = False
1136 else:
1136 else:
1137 self._debug_search = debug_search
1137 self._debug_search = debug_search
1138 self._debug_info = debug_info
1138 self._debug_info = debug_info
1139 self._snapshot_cache = SnapshotCache()
1139 self._snapshot_cache = SnapshotCache()
1140
1140
1141 @property
1141 @property
1142 def _gather_debug(self):
1142 def _gather_debug(self):
1143 return self._write_debug is not None or self._debug_info is not None
1143 return self._write_debug is not None or self._debug_info is not None
1144
1144
1145 def buildtext(self, revinfo):
1145 def buildtext(self, revinfo):
1146 """Builds a fulltext version of a revision
1146 """Builds a fulltext version of a revision
1147
1147
1148 revinfo: revisioninfo instance that contains all needed info
1148 revinfo: revisioninfo instance that contains all needed info
1149 """
1149 """
1150 btext = revinfo.btext
1150 btext = revinfo.btext
1151 if btext[0] is not None:
1151 if btext[0] is not None:
1152 return btext[0]
1152 return btext[0]
1153
1153
1154 revlog = self.revlog
1154 revlog = self.revlog
1155 cachedelta = revinfo.cachedelta
1155 cachedelta = revinfo.cachedelta
1156 baserev = cachedelta[0]
1156 baserev = cachedelta[0]
1157 delta = cachedelta[1]
1157 delta = cachedelta[1]
1158
1158
1159 fulltext = btext[0] = _textfromdelta(
1159 fulltext = btext[0] = _textfromdelta(
1160 revlog,
1160 revlog,
1161 baserev,
1161 baserev,
1162 delta,
1162 delta,
1163 revinfo.p1,
1163 revinfo.p1,
1164 revinfo.p2,
1164 revinfo.p2,
1165 revinfo.flags,
1165 revinfo.flags,
1166 revinfo.node,
1166 revinfo.node,
1167 )
1167 )
1168 return fulltext
1168 return fulltext
1169
1169
1170 def _builddeltadiff(self, base, revinfo):
1170 def _builddeltadiff(self, base, revinfo):
1171 revlog = self.revlog
1171 revlog = self.revlog
1172 t = self.buildtext(revinfo)
1172 t = self.buildtext(revinfo)
1173 if revlog.iscensored(base):
1173 if revlog.iscensored(base):
1174 # deltas based on a censored revision must replace the
1174 # deltas based on a censored revision must replace the
1175 # full content in one patch, so delta works everywhere
1175 # full content in one patch, so delta works everywhere
1176 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1176 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1177 delta = header + t
1177 delta = header + t
1178 else:
1178 else:
1179 ptext = revlog.rawdata(base)
1179 ptext = revlog.rawdata(base)
1180 delta = mdiff.textdiff(ptext, t)
1180 delta = mdiff.textdiff(ptext, t)
1181
1181
1182 return delta
1182 return delta
1183
1183
1184 def _builddeltainfo(self, revinfo, base, target_rev=None):
1184 def _builddeltainfo(self, revinfo, base, target_rev=None):
1185 # can we use the cached delta?
1185 # can we use the cached delta?
1186 revlog = self.revlog
1186 revlog = self.revlog
1187 chainbase = revlog.chainbase(base)
1187 chainbase = revlog.chainbase(base)
1188 if revlog.delta_config.general_delta:
1188 if revlog.delta_config.general_delta:
1189 deltabase = base
1189 deltabase = base
1190 else:
1190 else:
1191 if target_rev is not None and base != target_rev - 1:
1191 if target_rev is not None and base != target_rev - 1:
1192 msg = (
1192 msg = (
1193 b'general delta cannot use delta for something else '
1193 b'general delta cannot use delta for something else '
1194 b'than `prev`: %d<-%d'
1194 b'than `prev`: %d<-%d'
1195 )
1195 )
1196 msg %= (base, target_rev)
1196 msg %= (base, target_rev)
1197 raise error.ProgrammingError(msg)
1197 raise error.ProgrammingError(msg)
1198 deltabase = chainbase
1198 deltabase = chainbase
1199 snapshotdepth = None
1199 snapshotdepth = None
1200 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1200 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1201 snapshotdepth = 0
1201 snapshotdepth = 0
1202 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1202 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1203 # A delta chain should always be one full snapshot,
1203 # A delta chain should always be one full snapshot,
1204 # zero or more semi-snapshots, and zero or more deltas
1204 # zero or more semi-snapshots, and zero or more deltas
1205 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1205 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1206 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1206 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1207 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1207 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1208 delta = None
1208 delta = None
1209 if revinfo.cachedelta:
1209 if revinfo.cachedelta:
1210 cachebase = revinfo.cachedelta[0]
1210 cachebase = revinfo.cachedelta[0]
1211 # check if the diff still apply
1211 # check if the diff still apply
1212 currentbase = cachebase
1212 currentbase = cachebase
1213 while (
1213 while (
1214 currentbase != nullrev
1214 currentbase != nullrev
1215 and currentbase != base
1215 and currentbase != base
1216 and self.revlog.length(currentbase) == 0
1216 and self.revlog.length(currentbase) == 0
1217 ):
1217 ):
1218 currentbase = self.revlog.deltaparent(currentbase)
1218 currentbase = self.revlog.deltaparent(currentbase)
1219 if self.revlog.delta_config.lazy_delta and currentbase == base:
1219 if self.revlog.delta_config.lazy_delta and currentbase == base:
1220 delta = revinfo.cachedelta[1]
1220 delta = revinfo.cachedelta[1]
1221 if delta is None:
1221 if delta is None:
1222 delta = self._builddeltadiff(base, revinfo)
1222 delta = self._builddeltadiff(base, revinfo)
1223 if self._debug_search:
1223 if self._debug_search:
1224 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1224 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1225 msg %= len(delta)
1225 msg %= len(delta)
1226 self._write_debug(msg)
1226 self._write_debug(msg)
1227 # snapshotdept need to be neither None nor 0 level snapshot
1227 # snapshotdept need to be neither None nor 0 level snapshot
1228 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1228 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1229 lowestrealisticdeltalen = (
1229 lowestrealisticdeltalen = (
1230 len(delta) // revlog.delta_config.upper_bound_comp
1230 len(delta) // revlog.delta_config.upper_bound_comp
1231 )
1231 )
1232 snapshotlimit = revinfo.textlen >> snapshotdepth
1232 snapshotlimit = revinfo.textlen >> snapshotdepth
1233 if self._debug_search:
1233 if self._debug_search:
1234 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1234 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1235 msg %= lowestrealisticdeltalen
1235 msg %= lowestrealisticdeltalen
1236 self._write_debug(msg)
1236 self._write_debug(msg)
1237 if snapshotlimit < lowestrealisticdeltalen:
1237 if snapshotlimit < lowestrealisticdeltalen:
1238 if self._debug_search:
1238 if self._debug_search:
1239 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1239 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1240 self._write_debug(msg)
1240 self._write_debug(msg)
1241 return None
1241 return None
1242 if revlog.length(base) < lowestrealisticdeltalen:
1242 if revlog.length(base) < lowestrealisticdeltalen:
1243 if self._debug_search:
1243 if self._debug_search:
1244 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1244 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1245 self._write_debug(msg)
1245 self._write_debug(msg)
1246 return None
1246 return None
1247 header, data = revlog._inner.compress(delta)
1247 header, data = revlog._inner.compress(delta)
1248 deltalen = len(header) + len(data)
1248 deltalen = len(header) + len(data)
1249 offset = revlog.end(len(revlog) - 1)
1249 offset = revlog.end(len(revlog) - 1)
1250 dist = deltalen + offset - revlog.start(chainbase)
1250 dist = deltalen + offset - revlog.start(chainbase)
1251 chainlen, compresseddeltalen = revlog._chaininfo(base)
1251 chainlen, compresseddeltalen = revlog._chaininfo(base)
1252 chainlen += 1
1252 chainlen += 1
1253 compresseddeltalen += deltalen
1253 compresseddeltalen += deltalen
1254
1254
1255 return _deltainfo(
1255 return _deltainfo(
1256 dist,
1256 dist,
1257 deltalen,
1257 deltalen,
1258 (header, data),
1258 (header, data),
1259 deltabase,
1259 deltabase,
1260 chainbase,
1260 chainbase,
1261 chainlen,
1261 chainlen,
1262 compresseddeltalen,
1262 compresseddeltalen,
1263 snapshotdepth,
1263 snapshotdepth,
1264 )
1264 )
1265
1265
1266 def _fullsnapshotinfo(self, revinfo, curr):
1266 def _fullsnapshotinfo(self, revinfo, curr):
1267 rawtext = self.buildtext(revinfo)
1267 rawtext = self.buildtext(revinfo)
1268 data = self.revlog._inner.compress(rawtext)
1268 data = self.revlog._inner.compress(rawtext)
1269 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1269 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1270 deltabase = chainbase = curr
1270 deltabase = chainbase = curr
1271 snapshotdepth = 0
1271 snapshotdepth = 0
1272 chainlen = 1
1272 chainlen = 1
1273
1273
1274 return _deltainfo(
1274 return _deltainfo(
1275 dist,
1275 dist,
1276 deltalen,
1276 deltalen,
1277 data,
1277 data,
1278 deltabase,
1278 deltabase,
1279 chainbase,
1279 chainbase,
1280 chainlen,
1280 chainlen,
1281 compresseddeltalen,
1281 compresseddeltalen,
1282 snapshotdepth,
1282 snapshotdepth,
1283 )
1283 )
1284
1284
1285 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1285 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1286 """Find an acceptable delta against a candidate revision
1286 """Find an acceptable delta against a candidate revision
1287
1287
1288 revinfo: information about the revision (instance of _revisioninfo)
1288 revinfo: information about the revision (instance of _revisioninfo)
1289
1289
1290 Returns the first acceptable candidate revision, as ordered by
1290 Returns the first acceptable candidate revision, as ordered by
1291 _candidategroups
1291 _candidategroups
1292
1292
1293 If no suitable deltabase is found, we return delta info for a full
1293 If no suitable deltabase is found, we return delta info for a full
1294 snapshot.
1294 snapshot.
1295
1295
1296 `excluded_bases` is an optional set of revision that cannot be used as
1296 `excluded_bases` is an optional set of revision that cannot be used as
1297 a delta base. Use this to recompute delta suitable in censor or strip
1297 a delta base. Use this to recompute delta suitable in censor or strip
1298 context.
1298 context.
1299 """
1299 """
1300 if target_rev is None:
1300 if target_rev is None:
1301 target_rev = len(self.revlog)
1301 target_rev = len(self.revlog)
1302
1302
1303 gather_debug = self._gather_debug
1303 gather_debug = self._gather_debug
1304 cachedelta = revinfo.cachedelta
1304 cachedelta = revinfo.cachedelta
1305 revlog = self.revlog
1305 revlog = self.revlog
1306 p1r = p2r = None
1306 p1r = p2r = None
1307
1307
1308 if excluded_bases is None:
1308 if excluded_bases is None:
1309 excluded_bases = set()
1309 excluded_bases = set()
1310
1310
1311 if gather_debug:
1311 if gather_debug:
1312 start = util.timer()
1312 start = util.timer()
1313 dbg = self._one_dbg_data()
1313 dbg = self._one_dbg_data()
1314 dbg['revision'] = target_rev
1314 dbg['revision'] = target_rev
1315 target_revlog = b"UNKNOWN"
1315 target_revlog = b"UNKNOWN"
1316 target_type = self.revlog.target[0]
1316 target_type = self.revlog.target[0]
1317 target_key = self.revlog.target[1]
1317 target_key = self.revlog.target[1]
1318 if target_type == KIND_CHANGELOG:
1318 if target_type == KIND_CHANGELOG:
1319 target_revlog = b'CHANGELOG:'
1319 target_revlog = b'CHANGELOG:'
1320 elif target_type == KIND_MANIFESTLOG:
1320 elif target_type == KIND_MANIFESTLOG:
1321 target_revlog = b'MANIFESTLOG:'
1321 target_revlog = b'MANIFESTLOG:'
1322 if target_key:
1322 if target_key:
1323 target_revlog += b'%s:' % target_key
1323 target_revlog += b'%s:' % target_key
1324 elif target_type == KIND_FILELOG:
1324 elif target_type == KIND_FILELOG:
1325 target_revlog = b'FILELOG:'
1325 target_revlog = b'FILELOG:'
1326 if target_key:
1326 if target_key:
1327 target_revlog += b'%s:' % target_key
1327 target_revlog += b'%s:' % target_key
1328 dbg['target-revlog'] = target_revlog
1328 dbg['target-revlog'] = target_revlog
1329 p1r = revlog.rev(revinfo.p1)
1329 p1r = revlog.rev(revinfo.p1)
1330 p2r = revlog.rev(revinfo.p2)
1330 p2r = revlog.rev(revinfo.p2)
1331 if p1r != nullrev:
1331 if p1r != nullrev:
1332 p1_chain_len = revlog._chaininfo(p1r)[0]
1332 p1_chain_len = revlog._chaininfo(p1r)[0]
1333 else:
1333 else:
1334 p1_chain_len = -1
1334 p1_chain_len = -1
1335 if p2r != nullrev:
1335 if p2r != nullrev:
1336 p2_chain_len = revlog._chaininfo(p2r)[0]
1336 p2_chain_len = revlog._chaininfo(p2r)[0]
1337 else:
1337 else:
1338 p2_chain_len = -1
1338 p2_chain_len = -1
1339 dbg['p1-chain-len'] = p1_chain_len
1339 dbg['p1-chain-len'] = p1_chain_len
1340 dbg['p2-chain-len'] = p2_chain_len
1340 dbg['p2-chain-len'] = p2_chain_len
1341
1341
1342 # 1) if the revision is empty, no amount of delta can beat it
1342 # 1) if the revision is empty, no amount of delta can beat it
1343 #
1343 #
1344 # 2) no delta for flag processor revision (see "candelta" for why)
1344 # 2) no delta for flag processor revision (see "candelta" for why)
1345 # not calling candelta since only one revision needs test, also to
1345 # not calling candelta since only one revision needs test, also to
1346 # avoid overhead fetching flags again.
1346 # avoid overhead fetching flags again.
1347 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1347 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1348 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1348 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1349 if gather_debug:
1349 if gather_debug:
1350 end = util.timer()
1350 end = util.timer()
1351 dbg['duration'] = end - start
1351 dbg['duration'] = end - start
1352 dbg[
1352 dbg[
1353 'delta-base'
1353 'delta-base'
1354 ] = deltainfo.base # pytype: disable=attribute-error
1354 ] = deltainfo.base # pytype: disable=attribute-error
1355 dbg['search_round_count'] = 0
1355 dbg['search_round_count'] = 0
1356 dbg['using-cached-base'] = False
1356 dbg['using-cached-base'] = False
1357 dbg['delta_try_count'] = 0
1357 dbg['delta_try_count'] = 0
1358 dbg['type'] = b"full"
1358 dbg['type'] = b"full"
1359 dbg['snapshot-depth'] = 0
1359 dbg['snapshot-depth'] = 0
1360 self._dbg_process_data(dbg)
1360 self._dbg_process_data(dbg)
1361 return deltainfo
1361 return deltainfo
1362
1362
1363 deltainfo = None
1363 deltainfo = None
1364
1364
1365 # If this source delta are to be forcibly reuse, let us comply early.
1365 # If this source delta are to be forcibly reuse, let us comply early.
1366 if (
1366 if (
1367 revlog.delta_config.general_delta
1367 revlog.delta_config.general_delta
1368 and revinfo.cachedelta is not None
1368 and revinfo.cachedelta is not None
1369 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1369 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1370 ):
1370 ):
1371 base = revinfo.cachedelta[0]
1371 base = revinfo.cachedelta[0]
1372 if base == nullrev:
1372 if base == nullrev:
1373 dbg_type = b"full"
1373 dbg_type = b"full"
1374 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1374 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1375 if gather_debug:
1375 if gather_debug:
1376 snapshotdepth = 0
1376 snapshotdepth = 0
1377 elif base not in excluded_bases:
1377 elif base not in excluded_bases:
1378 delta = revinfo.cachedelta[1]
1378 delta = revinfo.cachedelta[1]
1379 header, data = revlog.compress(delta)
1379 header, data = revlog.compress(delta)
1380 deltalen = len(header) + len(data)
1380 deltalen = len(header) + len(data)
1381 if gather_debug:
1381 if gather_debug:
1382 offset = revlog.end(len(revlog) - 1)
1382 offset = revlog.end(len(revlog) - 1)
1383 chainbase = revlog.chainbase(base)
1383 chainbase = revlog.chainbase(base)
1384 distance = deltalen + offset - revlog.start(chainbase)
1384 distance = deltalen + offset - revlog.start(chainbase)
1385 chainlen, compresseddeltalen = revlog._chaininfo(base)
1385 chainlen, compresseddeltalen = revlog._chaininfo(base)
1386 chainlen += 1
1386 chainlen += 1
1387 compresseddeltalen += deltalen
1387 compresseddeltalen += deltalen
1388 if base == p1r or base == p2r:
1388 if base == p1r or base == p2r:
1389 dbg_type = b"delta"
1389 dbg_type = b"delta"
1390 snapshotdepth = None
1390 snapshotdepth = None
1391 elif not revlog.issnapshot(base):
1391 elif not revlog.issnapshot(base):
1392 snapshotdepth = None
1392 snapshotdepth = None
1393 else:
1393 else:
1394 dbg_type = b"snapshot"
1394 dbg_type = b"snapshot"
1395 snapshotdepth = revlog.snapshotdepth(base) + 1
1395 snapshotdepth = revlog.snapshotdepth(base) + 1
1396 else:
1396 else:
1397 distance = None
1397 distance = None
1398 chainbase = None
1398 chainbase = None
1399 chainlen = None
1399 chainlen = None
1400 compresseddeltalen = None
1400 compresseddeltalen = None
1401 snapshotdepth = None
1401 snapshotdepth = None
1402 deltainfo = _deltainfo(
1402 deltainfo = _deltainfo(
1403 distance=distance,
1403 distance=distance,
1404 deltalen=deltalen,
1404 deltalen=deltalen,
1405 data=(header, data),
1405 data=(header, data),
1406 base=base,
1406 base=base,
1407 chainbase=chainbase,
1407 chainbase=chainbase,
1408 chainlen=chainlen,
1408 chainlen=chainlen,
1409 compresseddeltalen=compresseddeltalen,
1409 compresseddeltalen=compresseddeltalen,
1410 snapshotdepth=snapshotdepth,
1410 snapshotdepth=snapshotdepth,
1411 )
1411 )
1412
1412
1413 if deltainfo is not None:
1413 if deltainfo is not None:
1414 if gather_debug:
1414 if gather_debug:
1415 end = util.timer()
1415 end = util.timer()
1416 dbg['duration'] = end - start
1416 dbg['duration'] = end - start
1417 dbg[
1417 dbg[
1418 'delta-base'
1418 'delta-base'
1419 ] = deltainfo.base # pytype: disable=attribute-error
1419 ] = deltainfo.base # pytype: disable=attribute-error
1420 dbg['search_round_count'] = 0
1420 dbg['search_round_count'] = 0
1421 dbg['using-cached-base'] = True
1421 dbg['using-cached-base'] = True
1422 dbg['delta_try_count'] = 0
1422 dbg['delta_try_count'] = 0
1423 dbg['type'] = b"full"
1423 dbg['type'] = b"full"
1424 if snapshotdepth is None:
1424 if snapshotdepth is None:
1425 dbg['snapshot-depth'] = 0
1425 dbg['snapshot-depth'] = 0
1426 else:
1426 else:
1427 dbg['snapshot-depth'] = snapshotdepth
1427 dbg['snapshot-depth'] = snapshotdepth
1428 self._dbg_process_data(dbg)
1428 self._dbg_process_data(dbg)
1429 return deltainfo
1429 return deltainfo
1430
1430
1431 # count the number of different delta we tried (for debug purpose)
1431 # count the number of different delta we tried (for debug purpose)
1432 dbg_try_count = 0
1432 dbg_try_count = 0
1433 # count the number of "search round" we did. (for debug purpose)
1433 # count the number of "search round" we did. (for debug purpose)
1434 dbg_try_rounds = 0
1434 dbg_try_rounds = 0
1435 dbg_type = b'unknown'
1435 dbg_type = b'unknown'
1436
1436
1437 if p1r is None:
1437 if p1r is None:
1438 p1r = revlog.rev(revinfo.p1)
1438 p1r = revlog.rev(revinfo.p1)
1439 p2r = revlog.rev(revinfo.p2)
1439 p2r = revlog.rev(revinfo.p2)
1440
1440
1441 if self._debug_search:
1441 if self._debug_search:
1442 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1442 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1443 msg %= target_rev
1443 msg %= target_rev
1444 self._write_debug(msg)
1444 self._write_debug(msg)
1445
1445
1446 search = _DeltaSearch(
1446 search = _DeltaSearch(
1447 self.revlog,
1447 self.revlog,
1448 revinfo.textlen,
1448 revinfo.textlen,
1449 p1r,
1449 p1r,
1450 p2r,
1450 p2r,
1451 cachedelta,
1451 cachedelta,
1452 excluded_bases,
1452 excluded_bases,
1453 target_rev,
1453 target_rev,
1454 snapshot_cache=self._snapshot_cache,
1454 snapshot_cache=self._snapshot_cache,
1455 )
1455 )
1456
1456
1457 groups = search.candidate_groups()
1457 groups = search.candidate_groups()
1458 candidaterevs = next(groups)
1458 candidaterevs = next(groups)
1459 while candidaterevs is not None:
1459 while candidaterevs is not None:
1460 dbg_try_rounds += 1
1460 dbg_try_rounds += 1
1461 if self._debug_search:
1461 if self._debug_search:
1462 prev = None
1462 prev = None
1463 if deltainfo is not None:
1463 if deltainfo is not None:
1464 prev = deltainfo.base
1464 prev = deltainfo.base
1465
1465
1466 if (
1466 if (
1467 cachedelta is not None
1467 cachedelta is not None
1468 and len(candidaterevs) == 1
1468 and len(candidaterevs) == 1
1469 and cachedelta[0] in candidaterevs
1469 and cachedelta[0] in candidaterevs
1470 ):
1470 ):
1471 round_type = b"cached-delta"
1471 round_type = b"cached-delta"
1472 elif p1r in candidaterevs or p2r in candidaterevs:
1472 elif p1r in candidaterevs or p2r in candidaterevs:
1473 round_type = b"parents"
1473 round_type = b"parents"
1474 elif prev is not None and all(c < prev for c in candidaterevs):
1474 elif prev is not None and all(c < prev for c in candidaterevs):
1475 round_type = b"refine-down"
1475 round_type = b"refine-down"
1476 elif prev is not None and all(c > prev for c in candidaterevs):
1476 elif prev is not None and all(c > prev for c in candidaterevs):
1477 round_type = b"refine-up"
1477 round_type = b"refine-up"
1478 else:
1478 else:
1479 round_type = b"search-down"
1479 round_type = b"search-down"
1480 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1480 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1481 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1481 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1482 self._write_debug(msg)
1482 self._write_debug(msg)
1483 nominateddeltas = []
1483 nominateddeltas = []
1484 if deltainfo is not None:
1484 if deltainfo is not None:
1485 if self._debug_search:
1485 if self._debug_search:
1486 msg = (
1486 msg = (
1487 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1487 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1488 )
1488 )
1489 msg %= (deltainfo.base, deltainfo.deltalen)
1489 msg %= (deltainfo.base, deltainfo.deltalen)
1490 self._write_debug(msg)
1490 self._write_debug(msg)
1491 # if we already found a good delta,
1491 # if we already found a good delta,
1492 # challenge it against refined candidates
1492 # challenge it against refined candidates
1493 nominateddeltas.append(deltainfo)
1493 nominateddeltas.append(deltainfo)
1494 for candidaterev in candidaterevs:
1494 for candidaterev in candidaterevs:
1495 if self._debug_search:
1495 if self._debug_search:
1496 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1496 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1497 msg %= candidaterev
1497 msg %= candidaterev
1498 self._write_debug(msg)
1498 self._write_debug(msg)
1499 candidate_type = None
1499 candidate_type = None
1500 if candidaterev == p1r:
1500 if candidaterev == p1r:
1501 candidate_type = b"p1"
1501 candidate_type = b"p1"
1502 elif candidaterev == p2r:
1502 elif candidaterev == p2r:
1503 candidate_type = b"p2"
1503 candidate_type = b"p2"
1504 elif self.revlog.issnapshot(candidaterev):
1504 elif self.revlog.issnapshot(candidaterev):
1505 candidate_type = b"snapshot-%d"
1505 candidate_type = b"snapshot-%d"
1506 candidate_type %= self.revlog.snapshotdepth(
1506 candidate_type %= self.revlog.snapshotdepth(
1507 candidaterev
1507 candidaterev
1508 )
1508 )
1509
1509
1510 if candidate_type is not None:
1510 if candidate_type is not None:
1511 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1511 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1512 msg %= candidate_type
1512 msg %= candidate_type
1513 self._write_debug(msg)
1513 self._write_debug(msg)
1514 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1514 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1515 msg %= self.revlog.length(candidaterev)
1515 msg %= self.revlog.length(candidaterev)
1516 self._write_debug(msg)
1516 self._write_debug(msg)
1517 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1517 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1518 msg %= self.revlog.deltaparent(candidaterev)
1518 msg %= self.revlog.deltaparent(candidaterev)
1519 self._write_debug(msg)
1519 self._write_debug(msg)
1520
1520
1521 dbg_try_count += 1
1521 dbg_try_count += 1
1522
1522
1523 if self._debug_search:
1523 if self._debug_search:
1524 delta_start = util.timer()
1524 delta_start = util.timer()
1525 candidatedelta = self._builddeltainfo(
1525 candidatedelta = self._builddeltainfo(
1526 revinfo,
1526 revinfo,
1527 candidaterev,
1527 candidaterev,
1528 target_rev=target_rev,
1528 target_rev=target_rev,
1529 )
1529 )
1530 if self._debug_search:
1530 if self._debug_search:
1531 delta_end = util.timer()
1531 delta_end = util.timer()
1532 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1532 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1533 msg %= delta_end - delta_start
1533 msg %= delta_end - delta_start
1534 self._write_debug(msg)
1534 self._write_debug(msg)
1535 if candidatedelta is not None:
1535 if candidatedelta is not None:
1536 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1536 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1537 if self._debug_search:
1537 if self._debug_search:
1538 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1538 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1539 msg %= candidatedelta.deltalen
1539 msg %= candidatedelta.deltalen
1540 self._write_debug(msg)
1540 self._write_debug(msg)
1541 nominateddeltas.append(candidatedelta)
1541 nominateddeltas.append(candidatedelta)
1542 elif self._debug_search:
1542 elif self._debug_search:
1543 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1543 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1544 msg %= candidatedelta.deltalen
1544 msg %= candidatedelta.deltalen
1545 self._write_debug(msg)
1545 self._write_debug(msg)
1546 elif self._debug_search:
1546 elif self._debug_search:
1547 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1547 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1548 self._write_debug(msg)
1548 self._write_debug(msg)
1549 if nominateddeltas:
1549 if nominateddeltas:
1550 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1550 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1551 if deltainfo is not None:
1551 if deltainfo is not None:
1552 candidaterevs = groups.send(deltainfo.base)
1552 candidaterevs = groups.send(deltainfo.base)
1553 else:
1553 else:
1554 candidaterevs = next(groups)
1554 candidaterevs = next(groups)
1555
1555
1556 if deltainfo is None:
1556 if deltainfo is None:
1557 dbg_type = b"full"
1557 dbg_type = b"full"
1558 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1558 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1559 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1559 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1560 dbg_type = b"snapshot"
1560 dbg_type = b"snapshot"
1561 else:
1561 else:
1562 dbg_type = b"delta"
1562 dbg_type = b"delta"
1563
1563
1564 if gather_debug:
1564 if gather_debug:
1565 end = util.timer()
1565 end = util.timer()
1566 if dbg_type == b'full':
1566 if dbg_type == b'full':
1567 used_cached = (
1567 used_cached = (
1568 cachedelta is not None
1568 cachedelta is not None
1569 and dbg_try_rounds == 0
1569 and dbg_try_rounds == 0
1570 and dbg_try_count == 0
1570 and dbg_try_count == 0
1571 and cachedelta[0] == nullrev
1571 and cachedelta[0] == nullrev
1572 )
1572 )
1573 else:
1573 else:
1574 used_cached = (
1574 used_cached = (
1575 cachedelta is not None
1575 cachedelta is not None
1576 and dbg_try_rounds == 1
1576 and dbg_try_rounds == 1
1577 and dbg_try_count == 1
1577 and dbg_try_count == 1
1578 and deltainfo.base == cachedelta[0]
1578 and deltainfo.base == cachedelta[0]
1579 )
1579 )
1580 dbg['duration'] = end - start
1580 dbg['duration'] = end - start
1581 dbg[
1581 dbg[
1582 'delta-base'
1582 'delta-base'
1583 ] = deltainfo.base # pytype: disable=attribute-error
1583 ] = deltainfo.base # pytype: disable=attribute-error
1584 dbg['search_round_count'] = dbg_try_rounds
1584 dbg['search_round_count'] = dbg_try_rounds
1585 dbg['using-cached-base'] = used_cached
1585 dbg['using-cached-base'] = used_cached
1586 dbg['delta_try_count'] = dbg_try_count
1586 dbg['delta_try_count'] = dbg_try_count
1587 dbg['type'] = dbg_type
1587 dbg['type'] = dbg_type
1588 if (
1588 if (
1589 deltainfo.snapshotdepth # pytype: disable=attribute-error
1589 deltainfo.snapshotdepth # pytype: disable=attribute-error
1590 is not None
1590 is not None
1591 ):
1591 ):
1592 dbg[
1592 dbg[
1593 'snapshot-depth'
1593 'snapshot-depth'
1594 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1594 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1595 else:
1595 else:
1596 dbg['snapshot-depth'] = 0
1596 dbg['snapshot-depth'] = 0
1597 self._dbg_process_data(dbg)
1597 self._dbg_process_data(dbg)
1598 return deltainfo
1598 return deltainfo
1599
1599
1600 def _one_dbg_data(self):
1600 def _one_dbg_data(self):
1601 return {
1601 return {
1602 'duration': None,
1602 'duration': None,
1603 'revision': None,
1603 'revision': None,
1604 'delta-base': None,
1604 'delta-base': None,
1605 'search_round_count': None,
1605 'search_round_count': None,
1606 'using-cached-base': None,
1606 'using-cached-base': None,
1607 'delta_try_count': None,
1607 'delta_try_count': None,
1608 'type': None,
1608 'type': None,
1609 'p1-chain-len': None,
1609 'p1-chain-len': None,
1610 'p2-chain-len': None,
1610 'p2-chain-len': None,
1611 'snapshot-depth': None,
1611 'snapshot-depth': None,
1612 'target-revlog': None,
1612 'target-revlog': None,
1613 }
1613 }
1614
1614
1615 def _dbg_process_data(self, dbg):
1615 def _dbg_process_data(self, dbg):
1616 if self._debug_info is not None:
1616 if self._debug_info is not None:
1617 self._debug_info.append(dbg)
1617 self._debug_info.append(dbg)
1618
1618
1619 if self._write_debug is not None:
1619 if self._write_debug is not None:
1620 msg = (
1620 msg = (
1621 b"DBG-DELTAS:"
1621 b"DBG-DELTAS:"
1622 b" %-12s"
1622 b" %-12s"
1623 b" rev=%d:"
1623 b" rev=%d:"
1624 b" delta-base=%d"
1624 b" delta-base=%d"
1625 b" is-cached=%d"
1625 b" is-cached=%d"
1626 b" - search-rounds=%d"
1626 b" - search-rounds=%d"
1627 b" try-count=%d"
1627 b" try-count=%d"
1628 b" - delta-type=%-6s"
1628 b" - delta-type=%-6s"
1629 b" snap-depth=%d"
1629 b" snap-depth=%d"
1630 b" - p1-chain-length=%d"
1630 b" - p1-chain-length=%d"
1631 b" p2-chain-length=%d"
1631 b" p2-chain-length=%d"
1632 b" - duration=%f"
1632 b" - duration=%f"
1633 b"\n"
1633 b"\n"
1634 )
1634 )
1635 msg %= (
1635 msg %= (
1636 dbg["target-revlog"],
1636 dbg["target-revlog"],
1637 dbg["revision"],
1637 dbg["revision"],
1638 dbg["delta-base"],
1638 dbg["delta-base"],
1639 dbg["using-cached-base"],
1639 dbg["using-cached-base"],
1640 dbg["search_round_count"],
1640 dbg["search_round_count"],
1641 dbg["delta_try_count"],
1641 dbg["delta_try_count"],
1642 dbg["type"],
1642 dbg["type"],
1643 dbg["snapshot-depth"],
1643 dbg["snapshot-depth"],
1644 dbg["p1-chain-len"],
1644 dbg["p1-chain-len"],
1645 dbg["p2-chain-len"],
1645 dbg["p2-chain-len"],
1646 dbg["duration"],
1646 dbg["duration"],
1647 )
1647 )
1648 self._write_debug(msg)
1648 self._write_debug(msg)
1649
1649
1650
1650
1651 def delta_compression(default_compression_header, deltainfo):
1651 def delta_compression(default_compression_header, deltainfo):
1652 """return (COMPRESSION_MODE, deltainfo)
1652 """return (COMPRESSION_MODE, deltainfo)
1653
1653
1654 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1654 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1655 compression.
1655 compression.
1656 """
1656 """
1657 h, d = deltainfo.data
1657 h, d = deltainfo.data
1658 compression_mode = COMP_MODE_INLINE
1658 compression_mode = COMP_MODE_INLINE
1659 if not h and not d:
1659 if not h and not d:
1660 # not data to store at all... declare them uncompressed
1660 # not data to store at all... declare them uncompressed
1661 compression_mode = COMP_MODE_PLAIN
1661 compression_mode = COMP_MODE_PLAIN
1662 elif not h:
1662 elif not h:
1663 t = d[0:1]
1663 t = d[0:1]
1664 if t == b'\0':
1664 if t == b'\0':
1665 compression_mode = COMP_MODE_PLAIN
1665 compression_mode = COMP_MODE_PLAIN
1666 elif t == default_compression_header:
1666 elif t == default_compression_header:
1667 compression_mode = COMP_MODE_DEFAULT
1667 compression_mode = COMP_MODE_DEFAULT
1668 elif h == b'u':
1668 elif h == b'u':
1669 # we have a more efficient way to declare uncompressed
1669 # we have a more efficient way to declare uncompressed
1670 h = b''
1670 h = b''
1671 compression_mode = COMP_MODE_PLAIN
1671 compression_mode = COMP_MODE_PLAIN
1672 deltainfo = drop_u_compression(deltainfo)
1672 deltainfo = drop_u_compression(deltainfo)
1673 return compression_mode, deltainfo
1673 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now