##// END OF EJS Templates
delta-find: drop the temporary indent...
marmoute -
r52230:ac8b798e default
parent child Browse files
Show More
@@ -1,1713 +1,1712 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 # If a revision's full text is that much bigger than a base candidate full
587 # If a revision's full text is that much bigger than a base candidate full
588 # text's, it is very unlikely that it will produce a valid delta. We no longer
588 # text's, it is very unlikely that it will produce a valid delta. We no longer
589 # consider these candidates.
589 # consider these candidates.
590 LIMIT_BASE2TEXT = 500
590 LIMIT_BASE2TEXT = 500
591
591
592
592
593 class _DeltaSearch:
593 class _DeltaSearch:
594 """perform the search of a good delta for a single revlog revision
594 """perform the search of a good delta for a single revlog revision
595
595
596 note: some of the deltacomputer.finddeltainfo logic should probably move
596 note: some of the deltacomputer.finddeltainfo logic should probably move
597 here.
597 here.
598 """
598 """
599
599
600 def __init__(
600 def __init__(
601 self,
601 self,
602 revlog,
602 revlog,
603 revinfo,
603 revinfo,
604 p1,
604 p1,
605 p2,
605 p2,
606 cachedelta,
606 cachedelta,
607 excluded_bases=None,
607 excluded_bases=None,
608 target_rev=None,
608 target_rev=None,
609 snapshot_cache=None,
609 snapshot_cache=None,
610 ):
610 ):
611 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
611 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
612 # so we should never end up asking such question. Adding the assert as
612 # so we should never end up asking such question. Adding the assert as
613 # a safe-guard to detect anything that would be fishy in this regard.
613 # a safe-guard to detect anything that would be fishy in this regard.
614 assert (
614 assert (
615 cachedelta is None
615 cachedelta is None
616 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
616 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
617 or not revlog.delta_config.general_delta
617 or not revlog.delta_config.general_delta
618 )
618 )
619 self.revlog = revlog
619 self.revlog = revlog
620 self.revinfo = revinfo
620 self.revinfo = revinfo
621 self.textlen = revinfo.textlen
621 self.textlen = revinfo.textlen
622 self.p1 = p1
622 self.p1 = p1
623 self.p2 = p2
623 self.p2 = p2
624 self.cachedelta = cachedelta
624 self.cachedelta = cachedelta
625 self.excluded_bases = excluded_bases
625 self.excluded_bases = excluded_bases
626 if target_rev is None:
626 if target_rev is None:
627 self.target_rev = len(self.revlog)
627 self.target_rev = len(self.revlog)
628 self.target_rev = target_rev
628 self.target_rev = target_rev
629 if snapshot_cache is None:
629 if snapshot_cache is None:
630 # map: base-rev: [snapshot-revs]
630 # map: base-rev: [snapshot-revs]
631 snapshot_cache = SnapshotCache()
631 snapshot_cache = SnapshotCache()
632 self.snapshot_cache = snapshot_cache
632 self.snapshot_cache = snapshot_cache
633
633
634 self.tested = {nullrev}
634 self.tested = {nullrev}
635
635
636 self._candidates_iterator = self._candidate_groups()
636 self._candidates_iterator = self._candidate_groups()
637 self._last_good = None
637 self._last_good = None
638 self.current_group = self._candidates_iterator.send(self._last_good)
638 self.current_group = self._candidates_iterator.send(self._last_good)
639
639
640 @property
640 @property
641 def done(self):
641 def done(self):
642 """True when all possible candidate have been tested"""
642 """True when all possible candidate have been tested"""
643 return self.current_group is None
643 return self.current_group is None
644
644
645 def next_group(self, good_delta=None):
645 def next_group(self, good_delta=None):
646 """move to the next group to test
646 """move to the next group to test
647
647
648 The group of revision to test will be available in
648 The group of revision to test will be available in
649 `self.current_group`. If the previous group had any good delta, the
649 `self.current_group`. If the previous group had any good delta, the
650 best one can be passed as the `good_delta` parameter to help selecting
650 best one can be passed as the `good_delta` parameter to help selecting
651 the next group.
651 the next group.
652
652
653 If not revision remains to be, `self.done` will be True and
653 If not revision remains to be, `self.done` will be True and
654 `self.current_group` will be None.
654 `self.current_group` will be None.
655 """
655 """
656 if good_delta is not None:
656 if good_delta is not None:
657 self._last_good = good_delta.base
657 self._last_good = good_delta.base
658 self.current_group = self._candidates_iterator.send(self._last_good)
658 self.current_group = self._candidates_iterator.send(self._last_good)
659
659
660 def _candidate_groups(self):
660 def _candidate_groups(self):
661 """Provides group of revision to be tested as delta base
661 """Provides group of revision to be tested as delta base
662
662
663 This top level function focus on emitting groups with unique and
663 This top level function focus on emitting groups with unique and
664 worthwhile content. See _raw_candidate_groups for details about the
664 worthwhile content. See _raw_candidate_groups for details about the
665 group order.
665 group order.
666 """
666 """
667 # should we try to build a delta?
667 # should we try to build a delta?
668 if not (len(self.revlog) and self.revlog._storedeltachains):
668 if not (len(self.revlog) and self.revlog._storedeltachains):
669 yield None
669 yield None
670 return
670 return
671
671
672 if not self.revlog.delta_config.general_delta:
672 if not self.revlog.delta_config.general_delta:
673 # before general delta, there is only one possible delta base
673 # before general delta, there is only one possible delta base
674 yield (self.target_rev - 1,)
674 yield (self.target_rev - 1,)
675 yield None
675 yield None
676 return
676 return
677
677
678 good = None
678 good = None
679
679
680 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
680 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
681
681
682 tested = self.tested # prefetch for speed and code compactness
682 tested = self.tested # prefetch for speed and code compactness
683 candidates = self._refined_groups()
683 candidates = self._refined_groups()
684 while True:
684 while True:
685 temptative = candidates.send(good)
685 temptative = candidates.send(good)
686 if temptative is None:
686 if temptative is None:
687 break
687 break
688 group = self._pre_filter_candidate_revs(temptative)
688 group = self._pre_filter_candidate_revs(temptative)
689 if group:
689 if group:
690 # When the size of the candidate group is big, it can result in
690 # When the size of the candidate group is big, it can result in
691 # a quite significant performance impact. To reduce this, we
691 # a quite significant performance impact. To reduce this, we
692 # can send them in smaller batches until the new batch does not
692 # can send them in smaller batches until the new batch does not
693 # provide any improvements.
693 # provide any improvements.
694 #
694 #
695 # This might reduce the overall efficiency of the compression
695 # This might reduce the overall efficiency of the compression
696 # in some corner cases, but that should also prevent very
696 # in some corner cases, but that should also prevent very
697 # pathological cases from being an issue. (eg. 20 000
697 # pathological cases from being an issue. (eg. 20 000
698 # candidates).
698 # candidates).
699 #
699 #
700 # XXX note that the ordering of the group becomes important as
700 # XXX note that the ordering of the group becomes important as
701 # it now impacts the final result. The current order is
701 # it now impacts the final result. The current order is
702 # unprocessed and can be improved.
702 # unprocessed and can be improved.
703 if group_chunk_size == 0:
703 if group_chunk_size == 0:
704 tested.update(group)
704 tested.update(group)
705 good = yield tuple(group)
705 good = yield tuple(group)
706 else:
706 else:
707 prev_good = good
707 prev_good = good
708 for start in range(0, len(group), group_chunk_size):
708 for start in range(0, len(group), group_chunk_size):
709 sub_group = group[start : start + group_chunk_size]
709 sub_group = group[start : start + group_chunk_size]
710 tested.update(sub_group)
710 tested.update(sub_group)
711 good = yield tuple(sub_group)
711 good = yield tuple(sub_group)
712 if prev_good == good:
712 if prev_good == good:
713 break
713 break
714
714
715 yield None
715 yield None
716
716
717 def _pre_filter_candidate_revs(self, temptative):
717 def _pre_filter_candidate_revs(self, temptative):
718 """filter possible candidate before computing a delta
718 """filter possible candidate before computing a delta
719
719
720 This function use various criteria to pre-filter candidate delta base
720 This function use various criteria to pre-filter candidate delta base
721 before we compute a delta and evaluate its quality.
721 before we compute a delta and evaluate its quality.
722
722
723 Such pre-filter limit the number of computed delta, an expensive operation.
723 Such pre-filter limit the number of computed delta, an expensive operation.
724
724
725 return the updated list of revision to test
725 return the updated list of revision to test
726 """
726 """
727 deltalength = self.revlog.length
727 deltalength = self.revlog.length
728 deltaparent = self.revlog.deltaparent
728 deltaparent = self.revlog.deltaparent
729
729
730 tested = self.tested
730 tested = self.tested
731 group = []
731 group = []
732 for rev in temptative:
732 for rev in temptative:
733 # skip over empty delta (no need to include them in a chain)
733 # skip over empty delta (no need to include them in a chain)
734 while not (rev == nullrev or rev in tested or deltalength(rev)):
734 while not (rev == nullrev or rev in tested or deltalength(rev)):
735 tested.add(rev)
735 tested.add(rev)
736 rev = deltaparent(rev)
736 rev = deltaparent(rev)
737 if self._pre_filter_rev(rev):
737 if self._pre_filter_rev(rev):
738 group.append(rev)
738 group.append(rev)
739 else:
739 else:
740 self.tested.add(rev)
740 self.tested.add(rev)
741 return group
741 return group
742
742
743 def _pre_filter_rev(self, rev):
743 def _pre_filter_rev(self, rev):
744 """return True if it seems okay to test a rev, False otherwise"""
744 """return True if it seems okay to test a rev, False otherwise"""
745 if True:
745 # no need to try a delta against nullrev, this will be done as
746 # no need to try a delta against nullrev, this will be done as
746 # a last resort.
747 # a last resort.
747 if rev == nullrev:
748 if rev == nullrev:
748 return False
749 return False
749 # filter out revision we tested already
750 # filter out revision we tested already
750 if rev in self.tested:
751 if rev in self.tested:
751 return False
752 return False
753
752
754 # an higher authority deamed the base unworthy (e.g. censored)
753 # an higher authority deamed the base unworthy (e.g. censored)
755 if self.excluded_bases is not None and rev in self.excluded_bases:
754 if self.excluded_bases is not None and rev in self.excluded_bases:
756 return False
755 return False
757 # We are in some recomputation cases and that rev is too high
756 # We are in some recomputation cases and that rev is too high
758 # in the revlog
757 # in the revlog
759 if self.target_rev is not None and rev >= self.target_rev:
758 if self.target_rev is not None and rev >= self.target_rev:
760 return False
759 return False
761
760
762 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
761 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
763 # filter out delta base that will never produce good delta
762 # filter out delta base that will never produce good delta
764 #
763 #
765 # if the delta of that base is already bigger than the limit
764 # if the delta of that base is already bigger than the limit
766 # for the delta chain size, doing a delta is hopeless.
765 # for the delta chain size, doing a delta is hopeless.
767 if deltas_limit < self.revlog.length(rev):
766 if deltas_limit < self.revlog.length(rev):
768 return False
767 return False
769
768
770 sparse = self.revlog.delta_config.sparse_revlog
769 sparse = self.revlog.delta_config.sparse_revlog
771 # if the revision we test again is too small, the resulting delta
770 # if the revision we test again is too small, the resulting delta
772 # will be large anyway as that amount of data to be added is big
771 # will be large anyway as that amount of data to be added is big
773 if sparse and self.revlog.rawsize(rev) < (
772 if sparse and self.revlog.rawsize(rev) < (
774 self.textlen // LIMIT_BASE2TEXT
773 self.textlen // LIMIT_BASE2TEXT
775 ):
774 ):
776 return False
775 return False
777
776
778 # no delta for rawtext-changing revs (see "candelta" for why)
777 # no delta for rawtext-changing revs (see "candelta" for why)
779 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
778 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
780 return False
779 return False
781
780
782 # If we reach here, we are about to build and test a delta.
781 # If we reach here, we are about to build and test a delta.
783 # The delta building process will compute the chaininfo in all
782 # The delta building process will compute the chaininfo in all
784 # case, since that computation is cached, it is fine to access
783 # case, since that computation is cached, it is fine to access
785 # it here too.
784 # it here too.
786 chainlen, chainsize = self.revlog._chaininfo(rev)
785 chainlen, chainsize = self.revlog._chaininfo(rev)
787 # if chain will be too long, skip base
786 # if chain will be too long, skip base
788 if (
787 if (
789 self.revlog.delta_config.max_chain_len
788 self.revlog.delta_config.max_chain_len
790 and chainlen >= self.revlog.delta_config.max_chain_len
789 and chainlen >= self.revlog.delta_config.max_chain_len
791 ):
790 ):
792 return False
791 return False
793 # if chain already have too much data, skip base
792 # if chain already have too much data, skip base
794 if deltas_limit < chainsize:
793 if deltas_limit < chainsize:
795 return False
794 return False
796 if sparse and self.revlog.delta_config.upper_bound_comp is not None:
795 if sparse and self.revlog.delta_config.upper_bound_comp is not None:
797 maxcomp = self.revlog.delta_config.upper_bound_comp
796 maxcomp = self.revlog.delta_config.upper_bound_comp
798 basenotsnap = (self.p1, self.p2, nullrev)
797 basenotsnap = (self.p1, self.p2, nullrev)
799 if rev not in basenotsnap and self.revlog.issnapshot(rev):
798 if rev not in basenotsnap and self.revlog.issnapshot(rev):
800 snapshotdepth = self.revlog.snapshotdepth(rev)
799 snapshotdepth = self.revlog.snapshotdepth(rev)
801 # If text is significantly larger than the base, we can
800 # If text is significantly larger than the base, we can
802 # expect the resulting delta to be proportional to the size
801 # expect the resulting delta to be proportional to the size
803 # difference
802 # difference
804 revsize = self.revlog.rawsize(rev)
803 revsize = self.revlog.rawsize(rev)
805 rawsizedistance = max(self.textlen - revsize, 0)
804 rawsizedistance = max(self.textlen - revsize, 0)
806 # use an estimate of the compression upper bound.
805 # use an estimate of the compression upper bound.
807 lowestrealisticdeltalen = rawsizedistance // maxcomp
806 lowestrealisticdeltalen = rawsizedistance // maxcomp
808
807
809 # check the absolute constraint on the delta size
808 # check the absolute constraint on the delta size
810 snapshotlimit = self.textlen >> snapshotdepth
809 snapshotlimit = self.textlen >> snapshotdepth
811 if snapshotlimit < lowestrealisticdeltalen:
810 if snapshotlimit < lowestrealisticdeltalen:
812 # delta lower bound is larger than accepted upper
811 # delta lower bound is larger than accepted upper
813 # bound
812 # bound
814 return False
813 return False
815
814
816 # check the relative constraint on the delta size
815 # check the relative constraint on the delta size
817 revlength = self.revlog.length(rev)
816 revlength = self.revlog.length(rev)
818 if revlength < lowestrealisticdeltalen:
817 if revlength < lowestrealisticdeltalen:
819 # delta probable lower bound is larger than target
818 # delta probable lower bound is larger than target
820 # base
819 # base
821 return False
820 return False
822 return True
821 return True
823
822
824 def _refined_groups(self):
823 def _refined_groups(self):
825 good = None
824 good = None
826 # First we try to reuse a the delta contained in the bundle. (or from
825 # First we try to reuse a the delta contained in the bundle. (or from
827 # the source revlog)
826 # the source revlog)
828 #
827 #
829 # This logic only applies to general delta repositories and can be
828 # This logic only applies to general delta repositories and can be
830 # disabled through configuration. Disabling reuse source delta is
829 # disabled through configuration. Disabling reuse source delta is
831 # useful when we want to make sure we recomputed "optimal" deltas.
830 # useful when we want to make sure we recomputed "optimal" deltas.
832 debug_info = None
831 debug_info = None
833 if (
832 if (
834 self.cachedelta is not None
833 self.cachedelta is not None
835 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
834 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
836 ):
835 ):
837 # Assume what we received from the server is a good choice
836 # Assume what we received from the server is a good choice
838 # build delta will reuse the cache
837 # build delta will reuse the cache
839 if debug_info is not None:
838 if debug_info is not None:
840 debug_info['cached-delta.tested'] += 1
839 debug_info['cached-delta.tested'] += 1
841 good = yield (self.cachedelta[0],)
840 good = yield (self.cachedelta[0],)
842 if good is not None:
841 if good is not None:
843 if debug_info is not None:
842 if debug_info is not None:
844 debug_info['cached-delta.accepted'] += 1
843 debug_info['cached-delta.accepted'] += 1
845 yield None
844 yield None
846 return
845 return
847 groups = self._raw_groups()
846 groups = self._raw_groups()
848 for candidates in groups:
847 for candidates in groups:
849 good = yield candidates
848 good = yield candidates
850 if good is not None:
849 if good is not None:
851 break
850 break
852
851
853 # If sparse revlog is enabled, we can try to refine the available
852 # If sparse revlog is enabled, we can try to refine the available
854 # deltas
853 # deltas
855 if not self.revlog.delta_config.sparse_revlog:
854 if not self.revlog.delta_config.sparse_revlog:
856 yield None
855 yield None
857 return
856 return
858
857
859 # if we have a refinable value, try to refine it
858 # if we have a refinable value, try to refine it
860 if (
859 if (
861 good is not None
860 good is not None
862 and good not in (self.p1, self.p2)
861 and good not in (self.p1, self.p2)
863 and self.revlog.issnapshot(good)
862 and self.revlog.issnapshot(good)
864 ):
863 ):
865 # refine snapshot down
864 # refine snapshot down
866 previous = None
865 previous = None
867 while previous != good:
866 while previous != good:
868 previous = good
867 previous = good
869 base = self.revlog.deltaparent(good)
868 base = self.revlog.deltaparent(good)
870 if base == nullrev:
869 if base == nullrev:
871 break
870 break
872 good = yield (base,)
871 good = yield (base,)
873 # refine snapshot up
872 # refine snapshot up
874 if not self.snapshot_cache.snapshots:
873 if not self.snapshot_cache.snapshots:
875 self.snapshot_cache.update(self.revlog, good + 1)
874 self.snapshot_cache.update(self.revlog, good + 1)
876 previous = None
875 previous = None
877 while good != previous:
876 while good != previous:
878 previous = good
877 previous = good
879 children = tuple(
878 children = tuple(
880 sorted(c for c in self.snapshot_cache.snapshots[good])
879 sorted(c for c in self.snapshot_cache.snapshots[good])
881 )
880 )
882 good = yield children
881 good = yield children
883
882
884 if debug_info is not None:
883 if debug_info is not None:
885 if good is None:
884 if good is None:
886 debug_info['no-solution'] += 1
885 debug_info['no-solution'] += 1
887
886
888 yield None
887 yield None
889
888
890 def _raw_groups(self):
889 def _raw_groups(self):
891 """Provides group of revision to be tested as delta base
890 """Provides group of revision to be tested as delta base
892
891
893 This lower level function focus on emitting delta theorically
892 This lower level function focus on emitting delta theorically
894 interresting without looking it any practical details.
893 interresting without looking it any practical details.
895
894
896 The group order aims at providing fast or small candidates first.
895 The group order aims at providing fast or small candidates first.
897 """
896 """
898 # Why search for delta base if we cannot use a delta base ?
897 # Why search for delta base if we cannot use a delta base ?
899 assert self.revlog.delta_config.general_delta
898 assert self.revlog.delta_config.general_delta
900 # also see issue6056
899 # also see issue6056
901 sparse = self.revlog.delta_config.sparse_revlog
900 sparse = self.revlog.delta_config.sparse_revlog
902 prev = self.target_rev - 1
901 prev = self.target_rev - 1
903 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
902 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
904
903
905 # exclude already lazy tested base if any
904 # exclude already lazy tested base if any
906 parents = [p for p in (self.p1, self.p2) if p != nullrev]
905 parents = [p for p in (self.p1, self.p2) if p != nullrev]
907
906
908 if (
907 if (
909 not self.revlog.delta_config.delta_both_parents
908 not self.revlog.delta_config.delta_both_parents
910 and len(parents) == 2
909 and len(parents) == 2
911 ):
910 ):
912 parents.sort()
911 parents.sort()
913 # To minimize the chance of having to build a fulltext,
912 # To minimize the chance of having to build a fulltext,
914 # pick first whichever parent is closest to us (max rev)
913 # pick first whichever parent is closest to us (max rev)
915 yield (parents[1],)
914 yield (parents[1],)
916 # then the other one (min rev) if the first did not fit
915 # then the other one (min rev) if the first did not fit
917 yield (parents[0],)
916 yield (parents[0],)
918 elif len(parents) > 0:
917 elif len(parents) > 0:
919 # Test all parents (1 or 2), and keep the best candidate
918 # Test all parents (1 or 2), and keep the best candidate
920 yield parents
919 yield parents
921
920
922 if sparse and parents:
921 if sparse and parents:
923 # See if we can use an existing snapshot in the parent chains to
922 # See if we can use an existing snapshot in the parent chains to
924 # use as a base for a new intermediate-snapshot
923 # use as a base for a new intermediate-snapshot
925 #
924 #
926 # search for snapshot in parents delta chain map: snapshot-level:
925 # search for snapshot in parents delta chain map: snapshot-level:
927 # snapshot-rev
926 # snapshot-rev
928 parents_snaps = collections.defaultdict(set)
927 parents_snaps = collections.defaultdict(set)
929 candidate_chains = [deltachain(p) for p in parents]
928 candidate_chains = [deltachain(p) for p in parents]
930 for chain in candidate_chains:
929 for chain in candidate_chains:
931 for idx, s in enumerate(chain):
930 for idx, s in enumerate(chain):
932 if not self.revlog.issnapshot(s):
931 if not self.revlog.issnapshot(s):
933 break
932 break
934 parents_snaps[idx].add(s)
933 parents_snaps[idx].add(s)
935 snapfloor = min(parents_snaps[0]) + 1
934 snapfloor = min(parents_snaps[0]) + 1
936 self.snapshot_cache.update(self.revlog, snapfloor)
935 self.snapshot_cache.update(self.revlog, snapfloor)
937 # search for the highest "unrelated" revision
936 # search for the highest "unrelated" revision
938 #
937 #
939 # Adding snapshots used by "unrelated" revision increase the odd we
938 # Adding snapshots used by "unrelated" revision increase the odd we
940 # reuse an independant, yet better snapshot chain.
939 # reuse an independant, yet better snapshot chain.
941 #
940 #
942 # XXX instead of building a set of revisions, we could lazily
941 # XXX instead of building a set of revisions, we could lazily
943 # enumerate over the chains. That would be more efficient, however
942 # enumerate over the chains. That would be more efficient, however
944 # we stick to simple code for now.
943 # we stick to simple code for now.
945 all_revs = set()
944 all_revs = set()
946 for chain in candidate_chains:
945 for chain in candidate_chains:
947 all_revs.update(chain)
946 all_revs.update(chain)
948 other = None
947 other = None
949 for r in self.revlog.revs(prev, snapfloor):
948 for r in self.revlog.revs(prev, snapfloor):
950 if r not in all_revs:
949 if r not in all_revs:
951 other = r
950 other = r
952 break
951 break
953 if other is not None:
952 if other is not None:
954 # To avoid unfair competition, we won't use unrelated
953 # To avoid unfair competition, we won't use unrelated
955 # intermediate snapshot that are deeper than the ones from the
954 # intermediate snapshot that are deeper than the ones from the
956 # parent delta chain.
955 # parent delta chain.
957 max_depth = max(parents_snaps.keys())
956 max_depth = max(parents_snaps.keys())
958 chain = deltachain(other)
957 chain = deltachain(other)
959 for depth, s in enumerate(chain):
958 for depth, s in enumerate(chain):
960 if s < snapfloor:
959 if s < snapfloor:
961 continue
960 continue
962 if max_depth < depth:
961 if max_depth < depth:
963 break
962 break
964 if not self.revlog.issnapshot(s):
963 if not self.revlog.issnapshot(s):
965 break
964 break
966 parents_snaps[depth].add(s)
965 parents_snaps[depth].add(s)
967 # Test them as possible intermediate snapshot base We test them
966 # Test them as possible intermediate snapshot base We test them
968 # from highest to lowest level. High level one are more likely to
967 # from highest to lowest level. High level one are more likely to
969 # result in small delta
968 # result in small delta
970 floor = None
969 floor = None
971 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
970 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
972 siblings = set()
971 siblings = set()
973 for s in snaps:
972 for s in snaps:
974 siblings.update(self.snapshot_cache.snapshots[s])
973 siblings.update(self.snapshot_cache.snapshots[s])
975 # Before considering making a new intermediate snapshot, we
974 # Before considering making a new intermediate snapshot, we
976 # check if an existing snapshot, children of base we consider,
975 # check if an existing snapshot, children of base we consider,
977 # would be suitable.
976 # would be suitable.
978 #
977 #
979 # It give a change to reuse a delta chain "unrelated" to the
978 # It give a change to reuse a delta chain "unrelated" to the
980 # current revision instead of starting our own. Without such
979 # current revision instead of starting our own. Without such
981 # re-use, topological branches would keep reopening new chains.
980 # re-use, topological branches would keep reopening new chains.
982 # Creating more and more snapshot as the repository grow.
981 # Creating more and more snapshot as the repository grow.
983
982
984 if floor is not None:
983 if floor is not None:
985 # We only do this for siblings created after the one in our
984 # We only do this for siblings created after the one in our
986 # parent's delta chain. Those created before has less
985 # parent's delta chain. Those created before has less
987 # chances to be valid base since our ancestors had to
986 # chances to be valid base since our ancestors had to
988 # create a new snapshot.
987 # create a new snapshot.
989 siblings = [r for r in siblings if floor < r]
988 siblings = [r for r in siblings if floor < r]
990 yield tuple(sorted(siblings))
989 yield tuple(sorted(siblings))
991 # then test the base from our parent's delta chain.
990 # then test the base from our parent's delta chain.
992 yield tuple(sorted(snaps))
991 yield tuple(sorted(snaps))
993 floor = min(snaps)
992 floor = min(snaps)
994 # No suitable base found in the parent chain, search if any full
993 # No suitable base found in the parent chain, search if any full
995 # snapshots emitted since parent's base would be a suitable base
994 # snapshots emitted since parent's base would be a suitable base
996 # for an intermediate snapshot.
995 # for an intermediate snapshot.
997 #
996 #
998 # It give a chance to reuse a delta chain unrelated to the current
997 # It give a chance to reuse a delta chain unrelated to the current
999 # revisions instead of starting our own. Without such re-use,
998 # revisions instead of starting our own. Without such re-use,
1000 # topological branches would keep reopening new full chains.
999 # topological branches would keep reopening new full chains.
1001 # Creating more and more snapshot as the repository grow.
1000 # Creating more and more snapshot as the repository grow.
1002 full = [
1001 full = [
1003 r
1002 r
1004 for r in self.snapshot_cache.snapshots[nullrev]
1003 for r in self.snapshot_cache.snapshots[nullrev]
1005 if snapfloor <= r
1004 if snapfloor <= r
1006 ]
1005 ]
1007 yield tuple(sorted(full))
1006 yield tuple(sorted(full))
1008
1007
1009 if not sparse:
1008 if not sparse:
1010 # other approach failed try against prev to hopefully save us a
1009 # other approach failed try against prev to hopefully save us a
1011 # fulltext.
1010 # fulltext.
1012 yield (prev,)
1011 yield (prev,)
1013
1012
1014 def is_good_delta_info(self, deltainfo):
1013 def is_good_delta_info(self, deltainfo):
1015 """Returns True if the given delta is good. Good means that it is
1014 """Returns True if the given delta is good. Good means that it is
1016 within the disk span, disk size, and chain length bounds that we know
1015 within the disk span, disk size, and chain length bounds that we know
1017 to be performant."""
1016 to be performant."""
1018 if deltainfo is None:
1017 if deltainfo is None:
1019 return False
1018 return False
1020
1019
1021 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
1020 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
1022 # so we should never end up asking such question. Adding the assert as
1021 # so we should never end up asking such question. Adding the assert as
1023 # a safe-guard to detect anything that would be fishy in this regard.
1022 # a safe-guard to detect anything that would be fishy in this regard.
1024 assert (
1023 assert (
1025 self.revinfo.cachedelta is None
1024 self.revinfo.cachedelta is None
1026 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
1025 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
1027 or not self.revlog.delta_config.general_delta
1026 or not self.revlog.delta_config.general_delta
1028 )
1027 )
1029
1028
1030 # - 'deltainfo.distance' is the distance from the base revision --
1029 # - 'deltainfo.distance' is the distance from the base revision --
1031 # bounding it limits the amount of I/O we need to do.
1030 # bounding it limits the amount of I/O we need to do.
1032 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
1031 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
1033 # deltas we need to apply -- bounding it limits the amount of CPU
1032 # deltas we need to apply -- bounding it limits the amount of CPU
1034 # we consume.
1033 # we consume.
1035
1034
1036 textlen = self.revinfo.textlen
1035 textlen = self.revinfo.textlen
1037 defaultmax = textlen * 4
1036 defaultmax = textlen * 4
1038 maxdist = self.revlog.delta_config.max_deltachain_span
1037 maxdist = self.revlog.delta_config.max_deltachain_span
1039 if not maxdist:
1038 if not maxdist:
1040 maxdist = deltainfo.distance # ensure the conditional pass
1039 maxdist = deltainfo.distance # ensure the conditional pass
1041 maxdist = max(maxdist, defaultmax)
1040 maxdist = max(maxdist, defaultmax)
1042
1041
1043 # Bad delta from read span:
1042 # Bad delta from read span:
1044 #
1043 #
1045 # If the span of data read is larger than the maximum allowed.
1044 # If the span of data read is larger than the maximum allowed.
1046 #
1045 #
1047 # In the sparse-revlog case, we rely on the associated "sparse
1046 # In the sparse-revlog case, we rely on the associated "sparse
1048 # reading" to avoid issue related to the span of data. In theory, it
1047 # reading" to avoid issue related to the span of data. In theory, it
1049 # would be possible to build pathological revlog where delta pattern
1048 # would be possible to build pathological revlog where delta pattern
1050 # would lead to too many reads. However, they do not happen in
1049 # would lead to too many reads. However, they do not happen in
1051 # practice at all. So we skip the span check entirely.
1050 # practice at all. So we skip the span check entirely.
1052 if (
1051 if (
1053 not self.revlog.delta_config.sparse_revlog
1052 not self.revlog.delta_config.sparse_revlog
1054 and maxdist < deltainfo.distance
1053 and maxdist < deltainfo.distance
1055 ):
1054 ):
1056 return False
1055 return False
1057
1056
1058 # Bad delta from new delta size:
1057 # Bad delta from new delta size:
1059 #
1058 #
1060 # If the delta size is larger than the target text, storing the delta
1059 # If the delta size is larger than the target text, storing the delta
1061 # will be inefficient.
1060 # will be inefficient.
1062 if textlen < deltainfo.deltalen:
1061 if textlen < deltainfo.deltalen:
1063 return False
1062 return False
1064
1063
1065 # Bad delta from cumulated payload size:
1064 # Bad delta from cumulated payload size:
1066 #
1065 #
1067 # If the sum of delta get larger than K * target text length.
1066 # If the sum of delta get larger than K * target text length.
1068 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
1067 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
1069 return False
1068 return False
1070
1069
1071 # Bad delta from chain length:
1070 # Bad delta from chain length:
1072 #
1071 #
1073 # If the number of delta in the chain gets too high.
1072 # If the number of delta in the chain gets too high.
1074 if (
1073 if (
1075 self.revlog.delta_config.max_chain_len
1074 self.revlog.delta_config.max_chain_len
1076 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1075 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1077 ):
1076 ):
1078 return False
1077 return False
1079
1078
1080 # bad delta from intermediate snapshot size limit
1079 # bad delta from intermediate snapshot size limit
1081 #
1080 #
1082 # If an intermediate snapshot size is higher than the limit. The
1081 # If an intermediate snapshot size is higher than the limit. The
1083 # limit exist to prevent endless chain of intermediate delta to be
1082 # limit exist to prevent endless chain of intermediate delta to be
1084 # created.
1083 # created.
1085 if (
1084 if (
1086 deltainfo.snapshotdepth is not None
1085 deltainfo.snapshotdepth is not None
1087 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
1086 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
1088 ):
1087 ):
1089 return False
1088 return False
1090
1089
1091 # bad delta if new intermediate snapshot is larger than the previous
1090 # bad delta if new intermediate snapshot is larger than the previous
1092 # snapshot
1091 # snapshot
1093 if (
1092 if (
1094 deltainfo.snapshotdepth
1093 deltainfo.snapshotdepth
1095 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1094 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1096 ):
1095 ):
1097 return False
1096 return False
1098
1097
1099 return True
1098 return True
1100
1099
1101
1100
1102 class SnapshotCache:
1101 class SnapshotCache:
1103 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1102 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1104
1103
1105 def __init__(self):
1104 def __init__(self):
1106 self.snapshots = collections.defaultdict(set)
1105 self.snapshots = collections.defaultdict(set)
1107 self._start_rev = None
1106 self._start_rev = None
1108 self._end_rev = None
1107 self._end_rev = None
1109
1108
1110 def update(self, revlog, start_rev=0):
1109 def update(self, revlog, start_rev=0):
1111 """find snapshots from start_rev to tip"""
1110 """find snapshots from start_rev to tip"""
1112 nb_revs = len(revlog)
1111 nb_revs = len(revlog)
1113 end_rev = nb_revs - 1
1112 end_rev = nb_revs - 1
1114 if start_rev > end_rev:
1113 if start_rev > end_rev:
1115 return # range is empty
1114 return # range is empty
1116
1115
1117 if self._start_rev is None:
1116 if self._start_rev is None:
1118 assert self._end_rev is None
1117 assert self._end_rev is None
1119 self._update(revlog, start_rev, end_rev)
1118 self._update(revlog, start_rev, end_rev)
1120 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1119 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1121 if start_rev < self._start_rev:
1120 if start_rev < self._start_rev:
1122 self._update(revlog, start_rev, self._start_rev - 1)
1121 self._update(revlog, start_rev, self._start_rev - 1)
1123 if self._end_rev < end_rev:
1122 if self._end_rev < end_rev:
1124 self._update(revlog, self._end_rev + 1, end_rev)
1123 self._update(revlog, self._end_rev + 1, end_rev)
1125
1124
1126 if self._start_rev is None:
1125 if self._start_rev is None:
1127 assert self._end_rev is None
1126 assert self._end_rev is None
1128 self._end_rev = end_rev
1127 self._end_rev = end_rev
1129 self._start_rev = start_rev
1128 self._start_rev = start_rev
1130 else:
1129 else:
1131 self._start_rev = min(self._start_rev, start_rev)
1130 self._start_rev = min(self._start_rev, start_rev)
1132 self._end_rev = max(self._end_rev, end_rev)
1131 self._end_rev = max(self._end_rev, end_rev)
1133 assert self._start_rev <= self._end_rev, (
1132 assert self._start_rev <= self._end_rev, (
1134 self._start_rev,
1133 self._start_rev,
1135 self._end_rev,
1134 self._end_rev,
1136 )
1135 )
1137
1136
1138 def _update(self, revlog, start_rev, end_rev):
1137 def _update(self, revlog, start_rev, end_rev):
1139 """internal method that actually do update content"""
1138 """internal method that actually do update content"""
1140 assert self._start_rev is None or (
1139 assert self._start_rev is None or (
1141 start_rev < self._start_rev or start_rev > self._end_rev
1140 start_rev < self._start_rev or start_rev > self._end_rev
1142 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1141 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1143 assert self._start_rev is None or (
1142 assert self._start_rev is None or (
1144 end_rev < self._start_rev or end_rev > self._end_rev
1143 end_rev < self._start_rev or end_rev > self._end_rev
1145 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1144 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1146 cache = self.snapshots
1145 cache = self.snapshots
1147 if hasattr(revlog.index, 'findsnapshots'):
1146 if hasattr(revlog.index, 'findsnapshots'):
1148 revlog.index.findsnapshots(cache, start_rev, end_rev)
1147 revlog.index.findsnapshots(cache, start_rev, end_rev)
1149 else:
1148 else:
1150 deltaparent = revlog.deltaparent
1149 deltaparent = revlog.deltaparent
1151 issnapshot = revlog.issnapshot
1150 issnapshot = revlog.issnapshot
1152 for rev in revlog.revs(start_rev, end_rev):
1151 for rev in revlog.revs(start_rev, end_rev):
1153 if issnapshot(rev):
1152 if issnapshot(rev):
1154 cache[deltaparent(rev)].add(rev)
1153 cache[deltaparent(rev)].add(rev)
1155
1154
1156
1155
1157 class deltacomputer:
1156 class deltacomputer:
1158 """object capable of computing delta and finding delta for multiple revision
1157 """object capable of computing delta and finding delta for multiple revision
1159
1158
1160 This object is meant to compute and find multiple delta applied to the same
1159 This object is meant to compute and find multiple delta applied to the same
1161 revlog.
1160 revlog.
1162 """
1161 """
1163
1162
1164 def __init__(
1163 def __init__(
1165 self,
1164 self,
1166 revlog,
1165 revlog,
1167 write_debug=None,
1166 write_debug=None,
1168 debug_search=False,
1167 debug_search=False,
1169 debug_info=None,
1168 debug_info=None,
1170 ):
1169 ):
1171 self.revlog = revlog
1170 self.revlog = revlog
1172 self._write_debug = write_debug
1171 self._write_debug = write_debug
1173 if write_debug is None:
1172 if write_debug is None:
1174 self._debug_search = False
1173 self._debug_search = False
1175 else:
1174 else:
1176 self._debug_search = debug_search
1175 self._debug_search = debug_search
1177 self._debug_info = debug_info
1176 self._debug_info = debug_info
1178 self._snapshot_cache = SnapshotCache()
1177 self._snapshot_cache = SnapshotCache()
1179
1178
1180 @property
1179 @property
1181 def _gather_debug(self):
1180 def _gather_debug(self):
1182 return self._write_debug is not None or self._debug_info is not None
1181 return self._write_debug is not None or self._debug_info is not None
1183
1182
1184 def buildtext(self, revinfo):
1183 def buildtext(self, revinfo):
1185 """Builds a fulltext version of a revision
1184 """Builds a fulltext version of a revision
1186
1185
1187 revinfo: revisioninfo instance that contains all needed info
1186 revinfo: revisioninfo instance that contains all needed info
1188 """
1187 """
1189 btext = revinfo.btext
1188 btext = revinfo.btext
1190 if btext[0] is not None:
1189 if btext[0] is not None:
1191 return btext[0]
1190 return btext[0]
1192
1191
1193 revlog = self.revlog
1192 revlog = self.revlog
1194 cachedelta = revinfo.cachedelta
1193 cachedelta = revinfo.cachedelta
1195 baserev = cachedelta[0]
1194 baserev = cachedelta[0]
1196 delta = cachedelta[1]
1195 delta = cachedelta[1]
1197
1196
1198 fulltext = btext[0] = _textfromdelta(
1197 fulltext = btext[0] = _textfromdelta(
1199 revlog,
1198 revlog,
1200 baserev,
1199 baserev,
1201 delta,
1200 delta,
1202 revinfo.p1,
1201 revinfo.p1,
1203 revinfo.p2,
1202 revinfo.p2,
1204 revinfo.flags,
1203 revinfo.flags,
1205 revinfo.node,
1204 revinfo.node,
1206 )
1205 )
1207 return fulltext
1206 return fulltext
1208
1207
1209 def _builddeltadiff(self, base, revinfo):
1208 def _builddeltadiff(self, base, revinfo):
1210 revlog = self.revlog
1209 revlog = self.revlog
1211 t = self.buildtext(revinfo)
1210 t = self.buildtext(revinfo)
1212 if revlog.iscensored(base):
1211 if revlog.iscensored(base):
1213 # deltas based on a censored revision must replace the
1212 # deltas based on a censored revision must replace the
1214 # full content in one patch, so delta works everywhere
1213 # full content in one patch, so delta works everywhere
1215 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1214 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1216 delta = header + t
1215 delta = header + t
1217 else:
1216 else:
1218 ptext = revlog.rawdata(base)
1217 ptext = revlog.rawdata(base)
1219 delta = mdiff.textdiff(ptext, t)
1218 delta = mdiff.textdiff(ptext, t)
1220
1219
1221 return delta
1220 return delta
1222
1221
1223 def _builddeltainfo(self, revinfo, base, target_rev=None):
1222 def _builddeltainfo(self, revinfo, base, target_rev=None):
1224 # can we use the cached delta?
1223 # can we use the cached delta?
1225 revlog = self.revlog
1224 revlog = self.revlog
1226 chainbase = revlog.chainbase(base)
1225 chainbase = revlog.chainbase(base)
1227 if revlog.delta_config.general_delta:
1226 if revlog.delta_config.general_delta:
1228 deltabase = base
1227 deltabase = base
1229 else:
1228 else:
1230 if target_rev is not None and base != target_rev - 1:
1229 if target_rev is not None and base != target_rev - 1:
1231 msg = (
1230 msg = (
1232 b'general delta cannot use delta for something else '
1231 b'general delta cannot use delta for something else '
1233 b'than `prev`: %d<-%d'
1232 b'than `prev`: %d<-%d'
1234 )
1233 )
1235 msg %= (base, target_rev)
1234 msg %= (base, target_rev)
1236 raise error.ProgrammingError(msg)
1235 raise error.ProgrammingError(msg)
1237 deltabase = chainbase
1236 deltabase = chainbase
1238 snapshotdepth = None
1237 snapshotdepth = None
1239 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1238 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1240 snapshotdepth = 0
1239 snapshotdepth = 0
1241 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1240 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1242 # A delta chain should always be one full snapshot,
1241 # A delta chain should always be one full snapshot,
1243 # zero or more semi-snapshots, and zero or more deltas
1242 # zero or more semi-snapshots, and zero or more deltas
1244 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1243 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1245 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1244 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1246 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1245 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1247 delta = None
1246 delta = None
1248 if revinfo.cachedelta:
1247 if revinfo.cachedelta:
1249 cachebase = revinfo.cachedelta[0]
1248 cachebase = revinfo.cachedelta[0]
1250 # check if the diff still apply
1249 # check if the diff still apply
1251 currentbase = cachebase
1250 currentbase = cachebase
1252 while (
1251 while (
1253 currentbase != nullrev
1252 currentbase != nullrev
1254 and currentbase != base
1253 and currentbase != base
1255 and self.revlog.length(currentbase) == 0
1254 and self.revlog.length(currentbase) == 0
1256 ):
1255 ):
1257 currentbase = self.revlog.deltaparent(currentbase)
1256 currentbase = self.revlog.deltaparent(currentbase)
1258 if self.revlog.delta_config.lazy_delta and currentbase == base:
1257 if self.revlog.delta_config.lazy_delta and currentbase == base:
1259 delta = revinfo.cachedelta[1]
1258 delta = revinfo.cachedelta[1]
1260 if delta is None:
1259 if delta is None:
1261 delta = self._builddeltadiff(base, revinfo)
1260 delta = self._builddeltadiff(base, revinfo)
1262 if self._debug_search:
1261 if self._debug_search:
1263 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1262 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1264 msg %= len(delta)
1263 msg %= len(delta)
1265 self._write_debug(msg)
1264 self._write_debug(msg)
1266 # snapshotdept need to be neither None nor 0 level snapshot
1265 # snapshotdept need to be neither None nor 0 level snapshot
1267 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1266 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1268 lowestrealisticdeltalen = (
1267 lowestrealisticdeltalen = (
1269 len(delta) // revlog.delta_config.upper_bound_comp
1268 len(delta) // revlog.delta_config.upper_bound_comp
1270 )
1269 )
1271 snapshotlimit = revinfo.textlen >> snapshotdepth
1270 snapshotlimit = revinfo.textlen >> snapshotdepth
1272 if self._debug_search:
1271 if self._debug_search:
1273 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1272 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1274 msg %= lowestrealisticdeltalen
1273 msg %= lowestrealisticdeltalen
1275 self._write_debug(msg)
1274 self._write_debug(msg)
1276 if snapshotlimit < lowestrealisticdeltalen:
1275 if snapshotlimit < lowestrealisticdeltalen:
1277 if self._debug_search:
1276 if self._debug_search:
1278 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1277 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1279 self._write_debug(msg)
1278 self._write_debug(msg)
1280 return None
1279 return None
1281 if revlog.length(base) < lowestrealisticdeltalen:
1280 if revlog.length(base) < lowestrealisticdeltalen:
1282 if self._debug_search:
1281 if self._debug_search:
1283 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1282 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1284 self._write_debug(msg)
1283 self._write_debug(msg)
1285 return None
1284 return None
1286 header, data = revlog._inner.compress(delta)
1285 header, data = revlog._inner.compress(delta)
1287 deltalen = len(header) + len(data)
1286 deltalen = len(header) + len(data)
1288 offset = revlog.end(len(revlog) - 1)
1287 offset = revlog.end(len(revlog) - 1)
1289 dist = deltalen + offset - revlog.start(chainbase)
1288 dist = deltalen + offset - revlog.start(chainbase)
1290 chainlen, compresseddeltalen = revlog._chaininfo(base)
1289 chainlen, compresseddeltalen = revlog._chaininfo(base)
1291 chainlen += 1
1290 chainlen += 1
1292 compresseddeltalen += deltalen
1291 compresseddeltalen += deltalen
1293
1292
1294 return _deltainfo(
1293 return _deltainfo(
1295 dist,
1294 dist,
1296 deltalen,
1295 deltalen,
1297 (header, data),
1296 (header, data),
1298 deltabase,
1297 deltabase,
1299 chainbase,
1298 chainbase,
1300 chainlen,
1299 chainlen,
1301 compresseddeltalen,
1300 compresseddeltalen,
1302 snapshotdepth,
1301 snapshotdepth,
1303 )
1302 )
1304
1303
1305 def _fullsnapshotinfo(self, revinfo, curr):
1304 def _fullsnapshotinfo(self, revinfo, curr):
1306 rawtext = self.buildtext(revinfo)
1305 rawtext = self.buildtext(revinfo)
1307 data = self.revlog._inner.compress(rawtext)
1306 data = self.revlog._inner.compress(rawtext)
1308 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1307 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1309 deltabase = chainbase = curr
1308 deltabase = chainbase = curr
1310 snapshotdepth = 0
1309 snapshotdepth = 0
1311 chainlen = 1
1310 chainlen = 1
1312
1311
1313 return _deltainfo(
1312 return _deltainfo(
1314 dist,
1313 dist,
1315 deltalen,
1314 deltalen,
1316 data,
1315 data,
1317 deltabase,
1316 deltabase,
1318 chainbase,
1317 chainbase,
1319 chainlen,
1318 chainlen,
1320 compresseddeltalen,
1319 compresseddeltalen,
1321 snapshotdepth,
1320 snapshotdepth,
1322 )
1321 )
1323
1322
1324 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1323 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1325 """Find an acceptable delta against a candidate revision
1324 """Find an acceptable delta against a candidate revision
1326
1325
1327 revinfo: information about the revision (instance of _revisioninfo)
1326 revinfo: information about the revision (instance of _revisioninfo)
1328
1327
1329 Returns the first acceptable candidate revision, as ordered by
1328 Returns the first acceptable candidate revision, as ordered by
1330 _candidategroups
1329 _candidategroups
1331
1330
1332 If no suitable deltabase is found, we return delta info for a full
1331 If no suitable deltabase is found, we return delta info for a full
1333 snapshot.
1332 snapshot.
1334
1333
1335 `excluded_bases` is an optional set of revision that cannot be used as
1334 `excluded_bases` is an optional set of revision that cannot be used as
1336 a delta base. Use this to recompute delta suitable in censor or strip
1335 a delta base. Use this to recompute delta suitable in censor or strip
1337 context.
1336 context.
1338 """
1337 """
1339 if target_rev is None:
1338 if target_rev is None:
1340 target_rev = len(self.revlog)
1339 target_rev = len(self.revlog)
1341
1340
1342 gather_debug = self._gather_debug
1341 gather_debug = self._gather_debug
1343 cachedelta = revinfo.cachedelta
1342 cachedelta = revinfo.cachedelta
1344 revlog = self.revlog
1343 revlog = self.revlog
1345 p1r = p2r = None
1344 p1r = p2r = None
1346
1345
1347 if excluded_bases is None:
1346 if excluded_bases is None:
1348 excluded_bases = set()
1347 excluded_bases = set()
1349
1348
1350 if gather_debug:
1349 if gather_debug:
1351 start = util.timer()
1350 start = util.timer()
1352 dbg = self._one_dbg_data()
1351 dbg = self._one_dbg_data()
1353 dbg['revision'] = target_rev
1352 dbg['revision'] = target_rev
1354 p1r = revlog.rev(revinfo.p1)
1353 p1r = revlog.rev(revinfo.p1)
1355 p2r = revlog.rev(revinfo.p2)
1354 p2r = revlog.rev(revinfo.p2)
1356 if p1r != nullrev:
1355 if p1r != nullrev:
1357 p1_chain_len = revlog._chaininfo(p1r)[0]
1356 p1_chain_len = revlog._chaininfo(p1r)[0]
1358 else:
1357 else:
1359 p1_chain_len = -1
1358 p1_chain_len = -1
1360 if p2r != nullrev:
1359 if p2r != nullrev:
1361 p2_chain_len = revlog._chaininfo(p2r)[0]
1360 p2_chain_len = revlog._chaininfo(p2r)[0]
1362 else:
1361 else:
1363 p2_chain_len = -1
1362 p2_chain_len = -1
1364 dbg['p1-chain-len'] = p1_chain_len
1363 dbg['p1-chain-len'] = p1_chain_len
1365 dbg['p2-chain-len'] = p2_chain_len
1364 dbg['p2-chain-len'] = p2_chain_len
1366
1365
1367 # 1) if the revision is empty, no amount of delta can beat it
1366 # 1) if the revision is empty, no amount of delta can beat it
1368 #
1367 #
1369 # 2) no delta for flag processor revision (see "candelta" for why)
1368 # 2) no delta for flag processor revision (see "candelta" for why)
1370 # not calling candelta since only one revision needs test, also to
1369 # not calling candelta since only one revision needs test, also to
1371 # avoid overhead fetching flags again.
1370 # avoid overhead fetching flags again.
1372 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1371 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1373 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1372 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1374 if gather_debug:
1373 if gather_debug:
1375 end = util.timer()
1374 end = util.timer()
1376 dbg['duration'] = end - start
1375 dbg['duration'] = end - start
1377 dbg[
1376 dbg[
1378 'delta-base'
1377 'delta-base'
1379 ] = deltainfo.base # pytype: disable=attribute-error
1378 ] = deltainfo.base # pytype: disable=attribute-error
1380 dbg['search_round_count'] = 0
1379 dbg['search_round_count'] = 0
1381 dbg['using-cached-base'] = False
1380 dbg['using-cached-base'] = False
1382 dbg['delta_try_count'] = 0
1381 dbg['delta_try_count'] = 0
1383 dbg['type'] = b"full"
1382 dbg['type'] = b"full"
1384 dbg['snapshot-depth'] = 0
1383 dbg['snapshot-depth'] = 0
1385 self._dbg_process_data(dbg)
1384 self._dbg_process_data(dbg)
1386 return deltainfo
1385 return deltainfo
1387
1386
1388 deltainfo = None
1387 deltainfo = None
1389
1388
1390 # If this source delta are to be forcibly reuse, let us comply early.
1389 # If this source delta are to be forcibly reuse, let us comply early.
1391 if (
1390 if (
1392 revlog.delta_config.general_delta
1391 revlog.delta_config.general_delta
1393 and revinfo.cachedelta is not None
1392 and revinfo.cachedelta is not None
1394 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1393 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1395 ):
1394 ):
1396 base = revinfo.cachedelta[0]
1395 base = revinfo.cachedelta[0]
1397 if base == nullrev:
1396 if base == nullrev:
1398 dbg_type = b"full"
1397 dbg_type = b"full"
1399 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1398 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1400 if gather_debug:
1399 if gather_debug:
1401 snapshotdepth = 0
1400 snapshotdepth = 0
1402 elif base not in excluded_bases:
1401 elif base not in excluded_bases:
1403 delta = revinfo.cachedelta[1]
1402 delta = revinfo.cachedelta[1]
1404 header, data = revlog.compress(delta)
1403 header, data = revlog.compress(delta)
1405 deltalen = len(header) + len(data)
1404 deltalen = len(header) + len(data)
1406 if gather_debug:
1405 if gather_debug:
1407 offset = revlog.end(len(revlog) - 1)
1406 offset = revlog.end(len(revlog) - 1)
1408 chainbase = revlog.chainbase(base)
1407 chainbase = revlog.chainbase(base)
1409 distance = deltalen + offset - revlog.start(chainbase)
1408 distance = deltalen + offset - revlog.start(chainbase)
1410 chainlen, compresseddeltalen = revlog._chaininfo(base)
1409 chainlen, compresseddeltalen = revlog._chaininfo(base)
1411 chainlen += 1
1410 chainlen += 1
1412 compresseddeltalen += deltalen
1411 compresseddeltalen += deltalen
1413 if base == p1r or base == p2r:
1412 if base == p1r or base == p2r:
1414 dbg_type = b"delta"
1413 dbg_type = b"delta"
1415 snapshotdepth = None
1414 snapshotdepth = None
1416 elif not revlog.issnapshot(base):
1415 elif not revlog.issnapshot(base):
1417 snapshotdepth = None
1416 snapshotdepth = None
1418 else:
1417 else:
1419 dbg_type = b"snapshot"
1418 dbg_type = b"snapshot"
1420 snapshotdepth = revlog.snapshotdepth(base) + 1
1419 snapshotdepth = revlog.snapshotdepth(base) + 1
1421 else:
1420 else:
1422 distance = None
1421 distance = None
1423 chainbase = None
1422 chainbase = None
1424 chainlen = None
1423 chainlen = None
1425 compresseddeltalen = None
1424 compresseddeltalen = None
1426 snapshotdepth = None
1425 snapshotdepth = None
1427 deltainfo = _deltainfo(
1426 deltainfo = _deltainfo(
1428 distance=distance,
1427 distance=distance,
1429 deltalen=deltalen,
1428 deltalen=deltalen,
1430 data=(header, data),
1429 data=(header, data),
1431 base=base,
1430 base=base,
1432 chainbase=chainbase,
1431 chainbase=chainbase,
1433 chainlen=chainlen,
1432 chainlen=chainlen,
1434 compresseddeltalen=compresseddeltalen,
1433 compresseddeltalen=compresseddeltalen,
1435 snapshotdepth=snapshotdepth,
1434 snapshotdepth=snapshotdepth,
1436 )
1435 )
1437
1436
1438 if deltainfo is not None:
1437 if deltainfo is not None:
1439 if gather_debug:
1438 if gather_debug:
1440 end = util.timer()
1439 end = util.timer()
1441 dbg['duration'] = end - start
1440 dbg['duration'] = end - start
1442 dbg[
1441 dbg[
1443 'delta-base'
1442 'delta-base'
1444 ] = deltainfo.base # pytype: disable=attribute-error
1443 ] = deltainfo.base # pytype: disable=attribute-error
1445 dbg['search_round_count'] = 0
1444 dbg['search_round_count'] = 0
1446 dbg['using-cached-base'] = True
1445 dbg['using-cached-base'] = True
1447 dbg['delta_try_count'] = 0
1446 dbg['delta_try_count'] = 0
1448 dbg['type'] = b"full"
1447 dbg['type'] = b"full"
1449 if snapshotdepth is None:
1448 if snapshotdepth is None:
1450 dbg['snapshot-depth'] = -1
1449 dbg['snapshot-depth'] = -1
1451 else:
1450 else:
1452 dbg['snapshot-depth'] = snapshotdepth
1451 dbg['snapshot-depth'] = snapshotdepth
1453 self._dbg_process_data(dbg)
1452 self._dbg_process_data(dbg)
1454 return deltainfo
1453 return deltainfo
1455
1454
1456 # count the number of different delta we tried (for debug purpose)
1455 # count the number of different delta we tried (for debug purpose)
1457 dbg_try_count = 0
1456 dbg_try_count = 0
1458 # count the number of "search round" we did. (for debug purpose)
1457 # count the number of "search round" we did. (for debug purpose)
1459 dbg_try_rounds = 0
1458 dbg_try_rounds = 0
1460 dbg_type = b'unknown'
1459 dbg_type = b'unknown'
1461
1460
1462 if p1r is None:
1461 if p1r is None:
1463 p1r = revlog.rev(revinfo.p1)
1462 p1r = revlog.rev(revinfo.p1)
1464 p2r = revlog.rev(revinfo.p2)
1463 p2r = revlog.rev(revinfo.p2)
1465
1464
1466 if self._debug_search:
1465 if self._debug_search:
1467 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1466 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1468 msg %= target_rev
1467 msg %= target_rev
1469 self._write_debug(msg)
1468 self._write_debug(msg)
1470
1469
1471 search = _DeltaSearch(
1470 search = _DeltaSearch(
1472 self.revlog,
1471 self.revlog,
1473 revinfo,
1472 revinfo,
1474 p1r,
1473 p1r,
1475 p2r,
1474 p2r,
1476 cachedelta,
1475 cachedelta,
1477 excluded_bases,
1476 excluded_bases,
1478 target_rev,
1477 target_rev,
1479 snapshot_cache=self._snapshot_cache,
1478 snapshot_cache=self._snapshot_cache,
1480 )
1479 )
1481
1480
1482 while not search.done:
1481 while not search.done:
1483 current_group = search.current_group
1482 current_group = search.current_group
1484 # current_group can be `None`, but not is search.done is False
1483 # current_group can be `None`, but not is search.done is False
1485 # We add this assert to help pytype
1484 # We add this assert to help pytype
1486 assert current_group is not None
1485 assert current_group is not None
1487 candidaterevs = current_group
1486 candidaterevs = current_group
1488 dbg_try_rounds += 1
1487 dbg_try_rounds += 1
1489 if self._debug_search:
1488 if self._debug_search:
1490 prev = None
1489 prev = None
1491 if deltainfo is not None:
1490 if deltainfo is not None:
1492 prev = deltainfo.base
1491 prev = deltainfo.base
1493
1492
1494 if (
1493 if (
1495 cachedelta is not None
1494 cachedelta is not None
1496 and len(candidaterevs) == 1
1495 and len(candidaterevs) == 1
1497 and cachedelta[0] in candidaterevs
1496 and cachedelta[0] in candidaterevs
1498 ):
1497 ):
1499 round_type = b"cached-delta"
1498 round_type = b"cached-delta"
1500 elif p1r in candidaterevs or p2r in candidaterevs:
1499 elif p1r in candidaterevs or p2r in candidaterevs:
1501 round_type = b"parents"
1500 round_type = b"parents"
1502 elif prev is not None and all(c < prev for c in candidaterevs):
1501 elif prev is not None and all(c < prev for c in candidaterevs):
1503 round_type = b"refine-down"
1502 round_type = b"refine-down"
1504 elif prev is not None and all(c > prev for c in candidaterevs):
1503 elif prev is not None and all(c > prev for c in candidaterevs):
1505 round_type = b"refine-up"
1504 round_type = b"refine-up"
1506 else:
1505 else:
1507 round_type = b"search-down"
1506 round_type = b"search-down"
1508 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1507 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1509 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1508 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1510 self._write_debug(msg)
1509 self._write_debug(msg)
1511 nominateddeltas = []
1510 nominateddeltas = []
1512 if deltainfo is not None:
1511 if deltainfo is not None:
1513 if self._debug_search:
1512 if self._debug_search:
1514 msg = (
1513 msg = (
1515 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1514 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1516 )
1515 )
1517 msg %= (deltainfo.base, deltainfo.deltalen)
1516 msg %= (deltainfo.base, deltainfo.deltalen)
1518 self._write_debug(msg)
1517 self._write_debug(msg)
1519 # if we already found a good delta,
1518 # if we already found a good delta,
1520 # challenge it against refined candidates
1519 # challenge it against refined candidates
1521 nominateddeltas.append(deltainfo)
1520 nominateddeltas.append(deltainfo)
1522 for candidaterev in candidaterevs:
1521 for candidaterev in candidaterevs:
1523 if self._debug_search:
1522 if self._debug_search:
1524 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1523 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1525 msg %= candidaterev
1524 msg %= candidaterev
1526 self._write_debug(msg)
1525 self._write_debug(msg)
1527 candidate_type = None
1526 candidate_type = None
1528 if candidaterev == p1r:
1527 if candidaterev == p1r:
1529 candidate_type = b"p1"
1528 candidate_type = b"p1"
1530 elif candidaterev == p2r:
1529 elif candidaterev == p2r:
1531 candidate_type = b"p2"
1530 candidate_type = b"p2"
1532 elif self.revlog.issnapshot(candidaterev):
1531 elif self.revlog.issnapshot(candidaterev):
1533 candidate_type = b"snapshot-%d"
1532 candidate_type = b"snapshot-%d"
1534 candidate_type %= self.revlog.snapshotdepth(
1533 candidate_type %= self.revlog.snapshotdepth(
1535 candidaterev
1534 candidaterev
1536 )
1535 )
1537
1536
1538 if candidate_type is not None:
1537 if candidate_type is not None:
1539 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1538 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1540 msg %= candidate_type
1539 msg %= candidate_type
1541 self._write_debug(msg)
1540 self._write_debug(msg)
1542 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1541 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1543 msg %= self.revlog.length(candidaterev)
1542 msg %= self.revlog.length(candidaterev)
1544 self._write_debug(msg)
1543 self._write_debug(msg)
1545 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1544 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1546 msg %= self.revlog.deltaparent(candidaterev)
1545 msg %= self.revlog.deltaparent(candidaterev)
1547 self._write_debug(msg)
1546 self._write_debug(msg)
1548
1547
1549 dbg_try_count += 1
1548 dbg_try_count += 1
1550
1549
1551 if self._debug_search:
1550 if self._debug_search:
1552 delta_start = util.timer()
1551 delta_start = util.timer()
1553 candidatedelta = self._builddeltainfo(
1552 candidatedelta = self._builddeltainfo(
1554 revinfo,
1553 revinfo,
1555 candidaterev,
1554 candidaterev,
1556 target_rev=target_rev,
1555 target_rev=target_rev,
1557 )
1556 )
1558 if self._debug_search:
1557 if self._debug_search:
1559 delta_end = util.timer()
1558 delta_end = util.timer()
1560 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1559 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1561 msg %= delta_end - delta_start
1560 msg %= delta_end - delta_start
1562 self._write_debug(msg)
1561 self._write_debug(msg)
1563 if candidatedelta is not None:
1562 if candidatedelta is not None:
1564 if search.is_good_delta_info(candidatedelta):
1563 if search.is_good_delta_info(candidatedelta):
1565 if self._debug_search:
1564 if self._debug_search:
1566 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1565 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1567 msg %= candidatedelta.deltalen
1566 msg %= candidatedelta.deltalen
1568 self._write_debug(msg)
1567 self._write_debug(msg)
1569 nominateddeltas.append(candidatedelta)
1568 nominateddeltas.append(candidatedelta)
1570 elif self._debug_search:
1569 elif self._debug_search:
1571 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1570 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1572 msg %= candidatedelta.deltalen
1571 msg %= candidatedelta.deltalen
1573 self._write_debug(msg)
1572 self._write_debug(msg)
1574 elif self._debug_search:
1573 elif self._debug_search:
1575 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1574 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1576 self._write_debug(msg)
1575 self._write_debug(msg)
1577 if nominateddeltas:
1576 if nominateddeltas:
1578 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1577 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1579 search.next_group(deltainfo)
1578 search.next_group(deltainfo)
1580
1579
1581 if deltainfo is None:
1580 if deltainfo is None:
1582 dbg_type = b"full"
1581 dbg_type = b"full"
1583 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1582 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1584 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1583 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1585 dbg_type = b"snapshot"
1584 dbg_type = b"snapshot"
1586 else:
1585 else:
1587 dbg_type = b"delta"
1586 dbg_type = b"delta"
1588
1587
1589 if gather_debug:
1588 if gather_debug:
1590 end = util.timer()
1589 end = util.timer()
1591 if dbg_type == b'full':
1590 if dbg_type == b'full':
1592 used_cached = (
1591 used_cached = (
1593 cachedelta is not None
1592 cachedelta is not None
1594 and dbg_try_rounds == 0
1593 and dbg_try_rounds == 0
1595 and dbg_try_count == 0
1594 and dbg_try_count == 0
1596 and cachedelta[0] == nullrev
1595 and cachedelta[0] == nullrev
1597 )
1596 )
1598 else:
1597 else:
1599 used_cached = (
1598 used_cached = (
1600 cachedelta is not None
1599 cachedelta is not None
1601 and dbg_try_rounds == 1
1600 and dbg_try_rounds == 1
1602 and dbg_try_count == 1
1601 and dbg_try_count == 1
1603 and deltainfo.base == cachedelta[0]
1602 and deltainfo.base == cachedelta[0]
1604 )
1603 )
1605 dbg['duration'] = end - start
1604 dbg['duration'] = end - start
1606 dbg[
1605 dbg[
1607 'delta-base'
1606 'delta-base'
1608 ] = deltainfo.base # pytype: disable=attribute-error
1607 ] = deltainfo.base # pytype: disable=attribute-error
1609 dbg['search_round_count'] = dbg_try_rounds
1608 dbg['search_round_count'] = dbg_try_rounds
1610 dbg['using-cached-base'] = used_cached
1609 dbg['using-cached-base'] = used_cached
1611 dbg['delta_try_count'] = dbg_try_count
1610 dbg['delta_try_count'] = dbg_try_count
1612 dbg['type'] = dbg_type
1611 dbg['type'] = dbg_type
1613 if (
1612 if (
1614 deltainfo.snapshotdepth # pytype: disable=attribute-error
1613 deltainfo.snapshotdepth # pytype: disable=attribute-error
1615 is not None
1614 is not None
1616 ):
1615 ):
1617 dbg[
1616 dbg[
1618 'snapshot-depth'
1617 'snapshot-depth'
1619 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1618 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1620 else:
1619 else:
1621 dbg['snapshot-depth'] = -1
1620 dbg['snapshot-depth'] = -1
1622 self._dbg_process_data(dbg)
1621 self._dbg_process_data(dbg)
1623 return deltainfo
1622 return deltainfo
1624
1623
1625 def _one_dbg_data(self):
1624 def _one_dbg_data(self):
1626 dbg = {
1625 dbg = {
1627 'duration': None,
1626 'duration': None,
1628 'revision': None,
1627 'revision': None,
1629 'delta-base': None,
1628 'delta-base': None,
1630 'search_round_count': None,
1629 'search_round_count': None,
1631 'using-cached-base': None,
1630 'using-cached-base': None,
1632 'delta_try_count': None,
1631 'delta_try_count': None,
1633 'type': None,
1632 'type': None,
1634 'p1-chain-len': None,
1633 'p1-chain-len': None,
1635 'p2-chain-len': None,
1634 'p2-chain-len': None,
1636 'snapshot-depth': None,
1635 'snapshot-depth': None,
1637 'target-revlog': None,
1636 'target-revlog': None,
1638 }
1637 }
1639 target_revlog = b"UNKNOWN"
1638 target_revlog = b"UNKNOWN"
1640 target_type = self.revlog.target[0]
1639 target_type = self.revlog.target[0]
1641 target_key = self.revlog.target[1]
1640 target_key = self.revlog.target[1]
1642 if target_type == KIND_CHANGELOG:
1641 if target_type == KIND_CHANGELOG:
1643 target_revlog = b'CHANGELOG:'
1642 target_revlog = b'CHANGELOG:'
1644 elif target_type == KIND_MANIFESTLOG:
1643 elif target_type == KIND_MANIFESTLOG:
1645 target_revlog = b'MANIFESTLOG:'
1644 target_revlog = b'MANIFESTLOG:'
1646 if target_key:
1645 if target_key:
1647 target_revlog += b'%s:' % target_key
1646 target_revlog += b'%s:' % target_key
1648 elif target_type == KIND_FILELOG:
1647 elif target_type == KIND_FILELOG:
1649 target_revlog = b'FILELOG:'
1648 target_revlog = b'FILELOG:'
1650 if target_key:
1649 if target_key:
1651 target_revlog += b'%s:' % target_key
1650 target_revlog += b'%s:' % target_key
1652 dbg['target-revlog'] = target_revlog
1651 dbg['target-revlog'] = target_revlog
1653 return dbg
1652 return dbg
1654
1653
1655 def _dbg_process_data(self, dbg):
1654 def _dbg_process_data(self, dbg):
1656 if self._debug_info is not None:
1655 if self._debug_info is not None:
1657 self._debug_info.append(dbg)
1656 self._debug_info.append(dbg)
1658
1657
1659 if self._write_debug is not None:
1658 if self._write_debug is not None:
1660 msg = (
1659 msg = (
1661 b"DBG-DELTAS:"
1660 b"DBG-DELTAS:"
1662 b" %-12s"
1661 b" %-12s"
1663 b" rev=%d:"
1662 b" rev=%d:"
1664 b" delta-base=%d"
1663 b" delta-base=%d"
1665 b" is-cached=%d"
1664 b" is-cached=%d"
1666 b" - search-rounds=%d"
1665 b" - search-rounds=%d"
1667 b" try-count=%d"
1666 b" try-count=%d"
1668 b" - delta-type=%-6s"
1667 b" - delta-type=%-6s"
1669 b" snap-depth=%d"
1668 b" snap-depth=%d"
1670 b" - p1-chain-length=%d"
1669 b" - p1-chain-length=%d"
1671 b" p2-chain-length=%d"
1670 b" p2-chain-length=%d"
1672 b" - duration=%f"
1671 b" - duration=%f"
1673 b"\n"
1672 b"\n"
1674 )
1673 )
1675 msg %= (
1674 msg %= (
1676 dbg["target-revlog"],
1675 dbg["target-revlog"],
1677 dbg["revision"],
1676 dbg["revision"],
1678 dbg["delta-base"],
1677 dbg["delta-base"],
1679 dbg["using-cached-base"],
1678 dbg["using-cached-base"],
1680 dbg["search_round_count"],
1679 dbg["search_round_count"],
1681 dbg["delta_try_count"],
1680 dbg["delta_try_count"],
1682 dbg["type"],
1681 dbg["type"],
1683 dbg["snapshot-depth"],
1682 dbg["snapshot-depth"],
1684 dbg["p1-chain-len"],
1683 dbg["p1-chain-len"],
1685 dbg["p2-chain-len"],
1684 dbg["p2-chain-len"],
1686 dbg["duration"],
1685 dbg["duration"],
1687 )
1686 )
1688 self._write_debug(msg)
1687 self._write_debug(msg)
1689
1688
1690
1689
1691 def delta_compression(default_compression_header, deltainfo):
1690 def delta_compression(default_compression_header, deltainfo):
1692 """return (COMPRESSION_MODE, deltainfo)
1691 """return (COMPRESSION_MODE, deltainfo)
1693
1692
1694 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1693 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1695 compression.
1694 compression.
1696 """
1695 """
1697 h, d = deltainfo.data
1696 h, d = deltainfo.data
1698 compression_mode = COMP_MODE_INLINE
1697 compression_mode = COMP_MODE_INLINE
1699 if not h and not d:
1698 if not h and not d:
1700 # not data to store at all... declare them uncompressed
1699 # not data to store at all... declare them uncompressed
1701 compression_mode = COMP_MODE_PLAIN
1700 compression_mode = COMP_MODE_PLAIN
1702 elif not h:
1701 elif not h:
1703 t = d[0:1]
1702 t = d[0:1]
1704 if t == b'\0':
1703 if t == b'\0':
1705 compression_mode = COMP_MODE_PLAIN
1704 compression_mode = COMP_MODE_PLAIN
1706 elif t == default_compression_header:
1705 elif t == default_compression_header:
1707 compression_mode = COMP_MODE_DEFAULT
1706 compression_mode = COMP_MODE_DEFAULT
1708 elif h == b'u':
1707 elif h == b'u':
1709 # we have a more efficient way to declare uncompressed
1708 # we have a more efficient way to declare uncompressed
1710 h = b''
1709 h = b''
1711 compression_mode = COMP_MODE_PLAIN
1710 compression_mode = COMP_MODE_PLAIN
1712 deltainfo = drop_u_compression(deltainfo)
1711 deltainfo = drop_u_compression(deltainfo)
1713 return compression_mode, deltainfo
1712 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now