##// END OF EJS Templates
delta-find: move tested in the _DeltaSearch.__init__...
marmoute -
r52219:01db705b default
parent child Browse files
Show More
@@ -1,1669 +1,1671 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
590 performant."""
591 if deltainfo is None:
591 if deltainfo is None:
592 return False
592 return False
593
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
597 assert (
598 revinfo.cachedelta is None
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
601 )
601 )
602
602
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
607 # we consume.
608
608
609 textlen = revinfo.textlen
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
610 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
611 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
615
615
616 # Bad delta from read span:
616 # Bad delta from read span:
617 #
617 #
618 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
619 #
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
624 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
626 return False
627
627
628 # Bad delta from new delta size:
628 # Bad delta from new delta size:
629 #
629 #
630 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
633 return False
633 return False
634
634
635 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
636 #
636 #
637 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
639 return False
640
640
641 # Bad delta from chain length:
641 # Bad delta from chain length:
642 #
642 #
643 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
644 if (
644 if (
645 revlog.delta_config.max_chain_len
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
651 #
651 #
652 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
654 # created.
655 if (
655 if (
656 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
658 ):
659 return False
659 return False
660
660
661 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
662 # snapshot
663 if (
663 if (
664 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
666 ):
667 return False
667 return False
668
668
669 return True
669 return True
670
670
671
671
672 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
674 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
676
676
677
677
678 class _DeltaSearch:
678 class _DeltaSearch:
679 """perform the search of a good delta for a single revlog revision
679 """perform the search of a good delta for a single revlog revision
680
680
681 note: some of the deltacomputer.finddeltainfo logic should probably move
681 note: some of the deltacomputer.finddeltainfo logic should probably move
682 here.
682 here.
683 """
683 """
684
684
685 def __init__(
685 def __init__(
686 self,
686 self,
687 revlog,
687 revlog,
688 textlen,
688 textlen,
689 p1,
689 p1,
690 p2,
690 p2,
691 cachedelta,
691 cachedelta,
692 excluded_bases=None,
692 excluded_bases=None,
693 target_rev=None,
693 target_rev=None,
694 snapshot_cache=None,
694 snapshot_cache=None,
695 ):
695 ):
696 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
696 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
697 # so we should never end up asking such question. Adding the assert as
697 # so we should never end up asking such question. Adding the assert as
698 # a safe-guard to detect anything that would be fishy in this regard.
698 # a safe-guard to detect anything that would be fishy in this regard.
699 assert (
699 assert (
700 cachedelta is None
700 cachedelta is None
701 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
701 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
702 or not revlog.delta_config.general_delta
702 or not revlog.delta_config.general_delta
703 )
703 )
704 self.revlog = revlog
704 self.revlog = revlog
705 self.textlen = textlen
705 self.textlen = textlen
706 self.p1 = p1
706 self.p1 = p1
707 self.p2 = p2
707 self.p2 = p2
708 self.cachedelta = cachedelta
708 self.cachedelta = cachedelta
709 self.excluded_bases = excluded_bases
709 self.excluded_bases = excluded_bases
710 if target_rev is None:
710 if target_rev is None:
711 self.target_rev = len(self.revlog)
711 self.target_rev = len(self.revlog)
712 self.target_rev = target_rev
712 self.target_rev = target_rev
713 if snapshot_cache is None:
713 if snapshot_cache is None:
714 # map: base-rev: [snapshot-revs]
714 # map: base-rev: [snapshot-revs]
715 snapshot_cache = SnapshotCache()
715 snapshot_cache = SnapshotCache()
716 self.snapshot_cache = snapshot_cache
716 self.snapshot_cache = snapshot_cache
717
717
718 self.tested = {nullrev}
719
718 def candidate_groups(self):
720 def candidate_groups(self):
719 """Provides group of revision to be tested as delta base
721 """Provides group of revision to be tested as delta base
720
722
721 This top level function focus on emitting groups with unique and
723 This top level function focus on emitting groups with unique and
722 worthwhile content. See _raw_candidate_groups for details about the
724 worthwhile content. See _raw_candidate_groups for details about the
723 group order.
725 group order.
724 """
726 """
725 # should we try to build a delta?
727 # should we try to build a delta?
726 if not (len(self.revlog) and self.revlog._storedeltachains):
728 if not (len(self.revlog) and self.revlog._storedeltachains):
727 yield None
729 yield None
728 return
730 return
729
731
730 if not self.revlog.delta_config.general_delta:
732 if not self.revlog.delta_config.general_delta:
731 # before general delta, there is only one possible delta base
733 # before general delta, there is only one possible delta base
732 yield (self.target_rev - 1,)
734 yield (self.target_rev - 1,)
733 yield None
735 yield None
734 return
736 return
735
737
736 deltalength = self.revlog.length
738 deltalength = self.revlog.length
737 deltaparent = self.revlog.deltaparent
739 deltaparent = self.revlog.deltaparent
738 sparse = self.revlog.delta_config.sparse_revlog
740 sparse = self.revlog.delta_config.sparse_revlog
739 good = None
741 good = None
740
742
741 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
743 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
742 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
744 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
743
745
744 tested = {nullrev}
746 tested = self.tested # prefetch for speed and code compactness
745 candidates = self._refined_groups()
747 candidates = self._refined_groups()
746 while True:
748 while True:
747 temptative = candidates.send(good)
749 temptative = candidates.send(good)
748 if temptative is None:
750 if temptative is None:
749 break
751 break
750 group = []
752 group = []
751 for rev in temptative:
753 for rev in temptative:
752 # skip over empty delta (no need to include them in a chain)
754 # skip over empty delta (no need to include them in a chain)
753 while not (rev == nullrev or rev in tested or deltalength(rev)):
755 while not (rev == nullrev or rev in tested or deltalength(rev)):
754 tested.add(rev)
756 tested.add(rev)
755 rev = deltaparent(rev)
757 rev = deltaparent(rev)
756 # no need to try a delta against nullrev, this will be done as
758 # no need to try a delta against nullrev, this will be done as
757 # a last resort.
759 # a last resort.
758 if rev == nullrev:
760 if rev == nullrev:
759 continue
761 continue
760 # filter out revision we tested already
762 # filter out revision we tested already
761 if rev in tested:
763 if rev in tested:
762 continue
764 continue
763
765
764 # an higher authority deamed the base unworthy (e.g. censored)
766 # an higher authority deamed the base unworthy (e.g. censored)
765 if (
767 if (
766 self.excluded_bases is not None
768 self.excluded_bases is not None
767 and rev in self.excluded_bases
769 and rev in self.excluded_bases
768 ):
770 ):
769 tested.add(rev)
771 tested.add(rev)
770 continue
772 continue
771 # We are in some recomputation cases and that rev is too high
773 # We are in some recomputation cases and that rev is too high
772 # in the revlog
774 # in the revlog
773 if self.target_rev is not None and rev >= self.target_rev:
775 if self.target_rev is not None and rev >= self.target_rev:
774 tested.add(rev)
776 tested.add(rev)
775 continue
777 continue
776 # filter out delta base that will never produce good delta
778 # filter out delta base that will never produce good delta
777 if deltas_limit < self.revlog.length(rev):
779 if deltas_limit < self.revlog.length(rev):
778 tested.add(rev)
780 tested.add(rev)
779 continue
781 continue
780 if sparse and self.revlog.rawsize(rev) < (
782 if sparse and self.revlog.rawsize(rev) < (
781 self.textlen // LIMIT_BASE2TEXT
783 self.textlen // LIMIT_BASE2TEXT
782 ):
784 ):
783 tested.add(rev)
785 tested.add(rev)
784 continue
786 continue
785 # no delta for rawtext-changing revs (see "candelta" for why)
787 # no delta for rawtext-changing revs (see "candelta" for why)
786 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
788 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
787 tested.add(rev)
789 tested.add(rev)
788 continue
790 continue
789
791
790 # If we reach here, we are about to build and test a delta.
792 # If we reach here, we are about to build and test a delta.
791 # The delta building process will compute the chaininfo in all
793 # The delta building process will compute the chaininfo in all
792 # case, since that computation is cached, it is fine to access
794 # case, since that computation is cached, it is fine to access
793 # it here too.
795 # it here too.
794 chainlen, chainsize = self.revlog._chaininfo(rev)
796 chainlen, chainsize = self.revlog._chaininfo(rev)
795 # if chain will be too long, skip base
797 # if chain will be too long, skip base
796 if (
798 if (
797 self.revlog.delta_config.max_chain_len
799 self.revlog.delta_config.max_chain_len
798 and chainlen >= self.revlog.delta_config.max_chain_len
800 and chainlen >= self.revlog.delta_config.max_chain_len
799 ):
801 ):
800 tested.add(rev)
802 tested.add(rev)
801 continue
803 continue
802 # if chain already have too much data, skip base
804 # if chain already have too much data, skip base
803 if deltas_limit < chainsize:
805 if deltas_limit < chainsize:
804 tested.add(rev)
806 tested.add(rev)
805 continue
807 continue
806 if (
808 if (
807 sparse
809 sparse
808 and self.revlog.delta_config.upper_bound_comp is not None
810 and self.revlog.delta_config.upper_bound_comp is not None
809 ):
811 ):
810 maxcomp = self.revlog.delta_config.upper_bound_comp
812 maxcomp = self.revlog.delta_config.upper_bound_comp
811 basenotsnap = (self.p1, self.p2, nullrev)
813 basenotsnap = (self.p1, self.p2, nullrev)
812 if rev not in basenotsnap and self.revlog.issnapshot(rev):
814 if rev not in basenotsnap and self.revlog.issnapshot(rev):
813 snapshotdepth = self.revlog.snapshotdepth(rev)
815 snapshotdepth = self.revlog.snapshotdepth(rev)
814 # If text is significantly larger than the base, we can
816 # If text is significantly larger than the base, we can
815 # expect the resulting delta to be proportional to the size
817 # expect the resulting delta to be proportional to the size
816 # difference
818 # difference
817 revsize = self.revlog.rawsize(rev)
819 revsize = self.revlog.rawsize(rev)
818 rawsizedistance = max(self.textlen - revsize, 0)
820 rawsizedistance = max(self.textlen - revsize, 0)
819 # use an estimate of the compression upper bound.
821 # use an estimate of the compression upper bound.
820 lowestrealisticdeltalen = rawsizedistance // maxcomp
822 lowestrealisticdeltalen = rawsizedistance // maxcomp
821
823
822 # check the absolute constraint on the delta size
824 # check the absolute constraint on the delta size
823 snapshotlimit = self.textlen >> snapshotdepth
825 snapshotlimit = self.textlen >> snapshotdepth
824 if snapshotlimit < lowestrealisticdeltalen:
826 if snapshotlimit < lowestrealisticdeltalen:
825 # delta lower bound is larger than accepted upper
827 # delta lower bound is larger than accepted upper
826 # bound
828 # bound
827 tested.add(rev)
829 tested.add(rev)
828 continue
830 continue
829
831
830 # check the relative constraint on the delta size
832 # check the relative constraint on the delta size
831 revlength = self.revlog.length(rev)
833 revlength = self.revlog.length(rev)
832 if revlength < lowestrealisticdeltalen:
834 if revlength < lowestrealisticdeltalen:
833 # delta probable lower bound is larger than target
835 # delta probable lower bound is larger than target
834 # base
836 # base
835 tested.add(rev)
837 tested.add(rev)
836 continue
838 continue
837
839
838 group.append(rev)
840 group.append(rev)
839 if group:
841 if group:
840 # When the size of the candidate group is big, it can result in
842 # When the size of the candidate group is big, it can result in
841 # a quite significant performance impact. To reduce this, we
843 # a quite significant performance impact. To reduce this, we
842 # can send them in smaller batches until the new batch does not
844 # can send them in smaller batches until the new batch does not
843 # provide any improvements.
845 # provide any improvements.
844 #
846 #
845 # This might reduce the overall efficiency of the compression
847 # This might reduce the overall efficiency of the compression
846 # in some corner cases, but that should also prevent very
848 # in some corner cases, but that should also prevent very
847 # pathological cases from being an issue. (eg. 20 000
849 # pathological cases from being an issue. (eg. 20 000
848 # candidates).
850 # candidates).
849 #
851 #
850 # XXX note that the ordering of the group becomes important as
852 # XXX note that the ordering of the group becomes important as
851 # it now impacts the final result. The current order is
853 # it now impacts the final result. The current order is
852 # unprocessed and can be improved.
854 # unprocessed and can be improved.
853 if group_chunk_size == 0:
855 if group_chunk_size == 0:
854 tested.update(group)
856 tested.update(group)
855 good = yield tuple(group)
857 good = yield tuple(group)
856 else:
858 else:
857 prev_good = good
859 prev_good = good
858 for start in range(0, len(group), group_chunk_size):
860 for start in range(0, len(group), group_chunk_size):
859 sub_group = group[start : start + group_chunk_size]
861 sub_group = group[start : start + group_chunk_size]
860 tested.update(sub_group)
862 tested.update(sub_group)
861 good = yield tuple(sub_group)
863 good = yield tuple(sub_group)
862 if prev_good == good:
864 if prev_good == good:
863 break
865 break
864
866
865 yield None
867 yield None
866
868
867 def _refined_groups(self):
869 def _refined_groups(self):
868 good = None
870 good = None
869 # First we try to reuse a the delta contained in the bundle. (or from
871 # First we try to reuse a the delta contained in the bundle. (or from
870 # the source revlog)
872 # the source revlog)
871 #
873 #
872 # This logic only applies to general delta repositories and can be
874 # This logic only applies to general delta repositories and can be
873 # disabled through configuration. Disabling reuse source delta is
875 # disabled through configuration. Disabling reuse source delta is
874 # useful when we want to make sure we recomputed "optimal" deltas.
876 # useful when we want to make sure we recomputed "optimal" deltas.
875 debug_info = None
877 debug_info = None
876 if (
878 if (
877 self.cachedelta is not None
879 self.cachedelta is not None
878 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
880 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
879 ):
881 ):
880 # Assume what we received from the server is a good choice
882 # Assume what we received from the server is a good choice
881 # build delta will reuse the cache
883 # build delta will reuse the cache
882 if debug_info is not None:
884 if debug_info is not None:
883 debug_info['cached-delta.tested'] += 1
885 debug_info['cached-delta.tested'] += 1
884 good = yield (self.cachedelta[0],)
886 good = yield (self.cachedelta[0],)
885 if good is not None:
887 if good is not None:
886 if debug_info is not None:
888 if debug_info is not None:
887 debug_info['cached-delta.accepted'] += 1
889 debug_info['cached-delta.accepted'] += 1
888 yield None
890 yield None
889 return
891 return
890 groups = self._raw_groups()
892 groups = self._raw_groups()
891 for candidates in groups:
893 for candidates in groups:
892 good = yield candidates
894 good = yield candidates
893 if good is not None:
895 if good is not None:
894 break
896 break
895
897
896 # If sparse revlog is enabled, we can try to refine the available
898 # If sparse revlog is enabled, we can try to refine the available
897 # deltas
899 # deltas
898 if not self.revlog.delta_config.sparse_revlog:
900 if not self.revlog.delta_config.sparse_revlog:
899 yield None
901 yield None
900 return
902 return
901
903
902 # if we have a refinable value, try to refine it
904 # if we have a refinable value, try to refine it
903 if (
905 if (
904 good is not None
906 good is not None
905 and good not in (self.p1, self.p2)
907 and good not in (self.p1, self.p2)
906 and self.revlog.issnapshot(good)
908 and self.revlog.issnapshot(good)
907 ):
909 ):
908 # refine snapshot down
910 # refine snapshot down
909 previous = None
911 previous = None
910 while previous != good:
912 while previous != good:
911 previous = good
913 previous = good
912 base = self.revlog.deltaparent(good)
914 base = self.revlog.deltaparent(good)
913 if base == nullrev:
915 if base == nullrev:
914 break
916 break
915 good = yield (base,)
917 good = yield (base,)
916 # refine snapshot up
918 # refine snapshot up
917 if not self.snapshot_cache.snapshots:
919 if not self.snapshot_cache.snapshots:
918 self.snapshot_cache.update(self.revlog, good + 1)
920 self.snapshot_cache.update(self.revlog, good + 1)
919 previous = None
921 previous = None
920 while good != previous:
922 while good != previous:
921 previous = good
923 previous = good
922 children = tuple(
924 children = tuple(
923 sorted(c for c in self.snapshot_cache.snapshots[good])
925 sorted(c for c in self.snapshot_cache.snapshots[good])
924 )
926 )
925 good = yield children
927 good = yield children
926
928
927 if debug_info is not None:
929 if debug_info is not None:
928 if good is None:
930 if good is None:
929 debug_info['no-solution'] += 1
931 debug_info['no-solution'] += 1
930
932
931 yield None
933 yield None
932
934
933 def _raw_groups(self):
935 def _raw_groups(self):
934 """Provides group of revision to be tested as delta base
936 """Provides group of revision to be tested as delta base
935
937
936 This lower level function focus on emitting delta theorically
938 This lower level function focus on emitting delta theorically
937 interresting without looking it any practical details.
939 interresting without looking it any practical details.
938
940
939 The group order aims at providing fast or small candidates first.
941 The group order aims at providing fast or small candidates first.
940 """
942 """
941 # Why search for delta base if we cannot use a delta base ?
943 # Why search for delta base if we cannot use a delta base ?
942 assert self.revlog.delta_config.general_delta
944 assert self.revlog.delta_config.general_delta
943 # also see issue6056
945 # also see issue6056
944 sparse = self.revlog.delta_config.sparse_revlog
946 sparse = self.revlog.delta_config.sparse_revlog
945 curr = len(self.revlog)
947 curr = len(self.revlog)
946 prev = curr - 1
948 prev = curr - 1
947 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
949 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
948
950
949 # exclude already lazy tested base if any
951 # exclude already lazy tested base if any
950 parents = [p for p in (self.p1, self.p2) if p != nullrev]
952 parents = [p for p in (self.p1, self.p2) if p != nullrev]
951
953
952 if (
954 if (
953 not self.revlog.delta_config.delta_both_parents
955 not self.revlog.delta_config.delta_both_parents
954 and len(parents) == 2
956 and len(parents) == 2
955 ):
957 ):
956 parents.sort()
958 parents.sort()
957 # To minimize the chance of having to build a fulltext,
959 # To minimize the chance of having to build a fulltext,
958 # pick first whichever parent is closest to us (max rev)
960 # pick first whichever parent is closest to us (max rev)
959 yield (parents[1],)
961 yield (parents[1],)
960 # then the other one (min rev) if the first did not fit
962 # then the other one (min rev) if the first did not fit
961 yield (parents[0],)
963 yield (parents[0],)
962 elif len(parents) > 0:
964 elif len(parents) > 0:
963 # Test all parents (1 or 2), and keep the best candidate
965 # Test all parents (1 or 2), and keep the best candidate
964 yield parents
966 yield parents
965
967
966 if sparse and parents:
968 if sparse and parents:
967 # See if we can use an existing snapshot in the parent chains to
969 # See if we can use an existing snapshot in the parent chains to
968 # use as a base for a new intermediate-snapshot
970 # use as a base for a new intermediate-snapshot
969 #
971 #
970 # search for snapshot in parents delta chain map: snapshot-level:
972 # search for snapshot in parents delta chain map: snapshot-level:
971 # snapshot-rev
973 # snapshot-rev
972 parents_snaps = collections.defaultdict(set)
974 parents_snaps = collections.defaultdict(set)
973 candidate_chains = [deltachain(p) for p in parents]
975 candidate_chains = [deltachain(p) for p in parents]
974 for chain in candidate_chains:
976 for chain in candidate_chains:
975 for idx, s in enumerate(chain):
977 for idx, s in enumerate(chain):
976 if not self.revlog.issnapshot(s):
978 if not self.revlog.issnapshot(s):
977 break
979 break
978 parents_snaps[idx].add(s)
980 parents_snaps[idx].add(s)
979 snapfloor = min(parents_snaps[0]) + 1
981 snapfloor = min(parents_snaps[0]) + 1
980 self.snapshot_cache.update(self.revlog, snapfloor)
982 self.snapshot_cache.update(self.revlog, snapfloor)
981 # search for the highest "unrelated" revision
983 # search for the highest "unrelated" revision
982 #
984 #
983 # Adding snapshots used by "unrelated" revision increase the odd we
985 # Adding snapshots used by "unrelated" revision increase the odd we
984 # reuse an independant, yet better snapshot chain.
986 # reuse an independant, yet better snapshot chain.
985 #
987 #
986 # XXX instead of building a set of revisions, we could lazily
988 # XXX instead of building a set of revisions, we could lazily
987 # enumerate over the chains. That would be more efficient, however
989 # enumerate over the chains. That would be more efficient, however
988 # we stick to simple code for now.
990 # we stick to simple code for now.
989 all_revs = set()
991 all_revs = set()
990 for chain in candidate_chains:
992 for chain in candidate_chains:
991 all_revs.update(chain)
993 all_revs.update(chain)
992 other = None
994 other = None
993 for r in self.revlog.revs(prev, snapfloor):
995 for r in self.revlog.revs(prev, snapfloor):
994 if r not in all_revs:
996 if r not in all_revs:
995 other = r
997 other = r
996 break
998 break
997 if other is not None:
999 if other is not None:
998 # To avoid unfair competition, we won't use unrelated
1000 # To avoid unfair competition, we won't use unrelated
999 # intermediate snapshot that are deeper than the ones from the
1001 # intermediate snapshot that are deeper than the ones from the
1000 # parent delta chain.
1002 # parent delta chain.
1001 max_depth = max(parents_snaps.keys())
1003 max_depth = max(parents_snaps.keys())
1002 chain = deltachain(other)
1004 chain = deltachain(other)
1003 for depth, s in enumerate(chain):
1005 for depth, s in enumerate(chain):
1004 if s < snapfloor:
1006 if s < snapfloor:
1005 continue
1007 continue
1006 if max_depth < depth:
1008 if max_depth < depth:
1007 break
1009 break
1008 if not self.revlog.issnapshot(s):
1010 if not self.revlog.issnapshot(s):
1009 break
1011 break
1010 parents_snaps[depth].add(s)
1012 parents_snaps[depth].add(s)
1011 # Test them as possible intermediate snapshot base We test them
1013 # Test them as possible intermediate snapshot base We test them
1012 # from highest to lowest level. High level one are more likely to
1014 # from highest to lowest level. High level one are more likely to
1013 # result in small delta
1015 # result in small delta
1014 floor = None
1016 floor = None
1015 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1017 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1016 siblings = set()
1018 siblings = set()
1017 for s in snaps:
1019 for s in snaps:
1018 siblings.update(self.snapshot_cache.snapshots[s])
1020 siblings.update(self.snapshot_cache.snapshots[s])
1019 # Before considering making a new intermediate snapshot, we
1021 # Before considering making a new intermediate snapshot, we
1020 # check if an existing snapshot, children of base we consider,
1022 # check if an existing snapshot, children of base we consider,
1021 # would be suitable.
1023 # would be suitable.
1022 #
1024 #
1023 # It give a change to reuse a delta chain "unrelated" to the
1025 # It give a change to reuse a delta chain "unrelated" to the
1024 # current revision instead of starting our own. Without such
1026 # current revision instead of starting our own. Without such
1025 # re-use, topological branches would keep reopening new chains.
1027 # re-use, topological branches would keep reopening new chains.
1026 # Creating more and more snapshot as the repository grow.
1028 # Creating more and more snapshot as the repository grow.
1027
1029
1028 if floor is not None:
1030 if floor is not None:
1029 # We only do this for siblings created after the one in our
1031 # We only do this for siblings created after the one in our
1030 # parent's delta chain. Those created before has less
1032 # parent's delta chain. Those created before has less
1031 # chances to be valid base since our ancestors had to
1033 # chances to be valid base since our ancestors had to
1032 # create a new snapshot.
1034 # create a new snapshot.
1033 siblings = [r for r in siblings if floor < r]
1035 siblings = [r for r in siblings if floor < r]
1034 yield tuple(sorted(siblings))
1036 yield tuple(sorted(siblings))
1035 # then test the base from our parent's delta chain.
1037 # then test the base from our parent's delta chain.
1036 yield tuple(sorted(snaps))
1038 yield tuple(sorted(snaps))
1037 floor = min(snaps)
1039 floor = min(snaps)
1038 # No suitable base found in the parent chain, search if any full
1040 # No suitable base found in the parent chain, search if any full
1039 # snapshots emitted since parent's base would be a suitable base
1041 # snapshots emitted since parent's base would be a suitable base
1040 # for an intermediate snapshot.
1042 # for an intermediate snapshot.
1041 #
1043 #
1042 # It give a chance to reuse a delta chain unrelated to the current
1044 # It give a chance to reuse a delta chain unrelated to the current
1043 # revisions instead of starting our own. Without such re-use,
1045 # revisions instead of starting our own. Without such re-use,
1044 # topological branches would keep reopening new full chains.
1046 # topological branches would keep reopening new full chains.
1045 # Creating more and more snapshot as the repository grow.
1047 # Creating more and more snapshot as the repository grow.
1046 full = [
1048 full = [
1047 r
1049 r
1048 for r in self.snapshot_cache.snapshots[nullrev]
1050 for r in self.snapshot_cache.snapshots[nullrev]
1049 if snapfloor <= r
1051 if snapfloor <= r
1050 ]
1052 ]
1051 yield tuple(sorted(full))
1053 yield tuple(sorted(full))
1052
1054
1053 if not sparse:
1055 if not sparse:
1054 # other approach failed try against prev to hopefully save us a
1056 # other approach failed try against prev to hopefully save us a
1055 # fulltext.
1057 # fulltext.
1056 yield (prev,)
1058 yield (prev,)
1057
1059
1058
1060
1059 class SnapshotCache:
1061 class SnapshotCache:
1060 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1062 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1061
1063
1062 def __init__(self):
1064 def __init__(self):
1063 self.snapshots = collections.defaultdict(set)
1065 self.snapshots = collections.defaultdict(set)
1064 self._start_rev = None
1066 self._start_rev = None
1065 self._end_rev = None
1067 self._end_rev = None
1066
1068
1067 def update(self, revlog, start_rev=0):
1069 def update(self, revlog, start_rev=0):
1068 """find snapshots from start_rev to tip"""
1070 """find snapshots from start_rev to tip"""
1069 nb_revs = len(revlog)
1071 nb_revs = len(revlog)
1070 end_rev = nb_revs - 1
1072 end_rev = nb_revs - 1
1071 if start_rev > end_rev:
1073 if start_rev > end_rev:
1072 return # range is empty
1074 return # range is empty
1073
1075
1074 if self._start_rev is None:
1076 if self._start_rev is None:
1075 assert self._end_rev is None
1077 assert self._end_rev is None
1076 self._update(revlog, start_rev, end_rev)
1078 self._update(revlog, start_rev, end_rev)
1077 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1079 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1078 if start_rev < self._start_rev:
1080 if start_rev < self._start_rev:
1079 self._update(revlog, start_rev, self._start_rev - 1)
1081 self._update(revlog, start_rev, self._start_rev - 1)
1080 if self._end_rev < end_rev:
1082 if self._end_rev < end_rev:
1081 self._update(revlog, self._end_rev + 1, end_rev)
1083 self._update(revlog, self._end_rev + 1, end_rev)
1082
1084
1083 if self._start_rev is None:
1085 if self._start_rev is None:
1084 assert self._end_rev is None
1086 assert self._end_rev is None
1085 self._end_rev = end_rev
1087 self._end_rev = end_rev
1086 self._start_rev = start_rev
1088 self._start_rev = start_rev
1087 else:
1089 else:
1088 self._start_rev = min(self._start_rev, start_rev)
1090 self._start_rev = min(self._start_rev, start_rev)
1089 self._end_rev = max(self._end_rev, end_rev)
1091 self._end_rev = max(self._end_rev, end_rev)
1090 assert self._start_rev <= self._end_rev, (
1092 assert self._start_rev <= self._end_rev, (
1091 self._start_rev,
1093 self._start_rev,
1092 self._end_rev,
1094 self._end_rev,
1093 )
1095 )
1094
1096
1095 def _update(self, revlog, start_rev, end_rev):
1097 def _update(self, revlog, start_rev, end_rev):
1096 """internal method that actually do update content"""
1098 """internal method that actually do update content"""
1097 assert self._start_rev is None or (
1099 assert self._start_rev is None or (
1098 start_rev < self._start_rev or start_rev > self._end_rev
1100 start_rev < self._start_rev or start_rev > self._end_rev
1099 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1101 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1100 assert self._start_rev is None or (
1102 assert self._start_rev is None or (
1101 end_rev < self._start_rev or end_rev > self._end_rev
1103 end_rev < self._start_rev or end_rev > self._end_rev
1102 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1104 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1103 cache = self.snapshots
1105 cache = self.snapshots
1104 if hasattr(revlog.index, 'findsnapshots'):
1106 if hasattr(revlog.index, 'findsnapshots'):
1105 revlog.index.findsnapshots(cache, start_rev, end_rev)
1107 revlog.index.findsnapshots(cache, start_rev, end_rev)
1106 else:
1108 else:
1107 deltaparent = revlog.deltaparent
1109 deltaparent = revlog.deltaparent
1108 issnapshot = revlog.issnapshot
1110 issnapshot = revlog.issnapshot
1109 for rev in revlog.revs(start_rev, end_rev):
1111 for rev in revlog.revs(start_rev, end_rev):
1110 if issnapshot(rev):
1112 if issnapshot(rev):
1111 cache[deltaparent(rev)].add(rev)
1113 cache[deltaparent(rev)].add(rev)
1112
1114
1113
1115
1114 class deltacomputer:
1116 class deltacomputer:
1115 """object capable of computing delta and finding delta for multiple revision
1117 """object capable of computing delta and finding delta for multiple revision
1116
1118
1117 This object is meant to compute and find multiple delta applied to the same
1119 This object is meant to compute and find multiple delta applied to the same
1118 revlog.
1120 revlog.
1119 """
1121 """
1120
1122
1121 def __init__(
1123 def __init__(
1122 self,
1124 self,
1123 revlog,
1125 revlog,
1124 write_debug=None,
1126 write_debug=None,
1125 debug_search=False,
1127 debug_search=False,
1126 debug_info=None,
1128 debug_info=None,
1127 ):
1129 ):
1128 self.revlog = revlog
1130 self.revlog = revlog
1129 self._write_debug = write_debug
1131 self._write_debug = write_debug
1130 if write_debug is None:
1132 if write_debug is None:
1131 self._debug_search = False
1133 self._debug_search = False
1132 else:
1134 else:
1133 self._debug_search = debug_search
1135 self._debug_search = debug_search
1134 self._debug_info = debug_info
1136 self._debug_info = debug_info
1135 self._snapshot_cache = SnapshotCache()
1137 self._snapshot_cache = SnapshotCache()
1136
1138
1137 @property
1139 @property
1138 def _gather_debug(self):
1140 def _gather_debug(self):
1139 return self._write_debug is not None or self._debug_info is not None
1141 return self._write_debug is not None or self._debug_info is not None
1140
1142
1141 def buildtext(self, revinfo):
1143 def buildtext(self, revinfo):
1142 """Builds a fulltext version of a revision
1144 """Builds a fulltext version of a revision
1143
1145
1144 revinfo: revisioninfo instance that contains all needed info
1146 revinfo: revisioninfo instance that contains all needed info
1145 """
1147 """
1146 btext = revinfo.btext
1148 btext = revinfo.btext
1147 if btext[0] is not None:
1149 if btext[0] is not None:
1148 return btext[0]
1150 return btext[0]
1149
1151
1150 revlog = self.revlog
1152 revlog = self.revlog
1151 cachedelta = revinfo.cachedelta
1153 cachedelta = revinfo.cachedelta
1152 baserev = cachedelta[0]
1154 baserev = cachedelta[0]
1153 delta = cachedelta[1]
1155 delta = cachedelta[1]
1154
1156
1155 fulltext = btext[0] = _textfromdelta(
1157 fulltext = btext[0] = _textfromdelta(
1156 revlog,
1158 revlog,
1157 baserev,
1159 baserev,
1158 delta,
1160 delta,
1159 revinfo.p1,
1161 revinfo.p1,
1160 revinfo.p2,
1162 revinfo.p2,
1161 revinfo.flags,
1163 revinfo.flags,
1162 revinfo.node,
1164 revinfo.node,
1163 )
1165 )
1164 return fulltext
1166 return fulltext
1165
1167
1166 def _builddeltadiff(self, base, revinfo):
1168 def _builddeltadiff(self, base, revinfo):
1167 revlog = self.revlog
1169 revlog = self.revlog
1168 t = self.buildtext(revinfo)
1170 t = self.buildtext(revinfo)
1169 if revlog.iscensored(base):
1171 if revlog.iscensored(base):
1170 # deltas based on a censored revision must replace the
1172 # deltas based on a censored revision must replace the
1171 # full content in one patch, so delta works everywhere
1173 # full content in one patch, so delta works everywhere
1172 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1174 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1173 delta = header + t
1175 delta = header + t
1174 else:
1176 else:
1175 ptext = revlog.rawdata(base)
1177 ptext = revlog.rawdata(base)
1176 delta = mdiff.textdiff(ptext, t)
1178 delta = mdiff.textdiff(ptext, t)
1177
1179
1178 return delta
1180 return delta
1179
1181
1180 def _builddeltainfo(self, revinfo, base, target_rev=None):
1182 def _builddeltainfo(self, revinfo, base, target_rev=None):
1181 # can we use the cached delta?
1183 # can we use the cached delta?
1182 revlog = self.revlog
1184 revlog = self.revlog
1183 chainbase = revlog.chainbase(base)
1185 chainbase = revlog.chainbase(base)
1184 if revlog.delta_config.general_delta:
1186 if revlog.delta_config.general_delta:
1185 deltabase = base
1187 deltabase = base
1186 else:
1188 else:
1187 if target_rev is not None and base != target_rev - 1:
1189 if target_rev is not None and base != target_rev - 1:
1188 msg = (
1190 msg = (
1189 b'general delta cannot use delta for something else '
1191 b'general delta cannot use delta for something else '
1190 b'than `prev`: %d<-%d'
1192 b'than `prev`: %d<-%d'
1191 )
1193 )
1192 msg %= (base, target_rev)
1194 msg %= (base, target_rev)
1193 raise error.ProgrammingError(msg)
1195 raise error.ProgrammingError(msg)
1194 deltabase = chainbase
1196 deltabase = chainbase
1195 snapshotdepth = None
1197 snapshotdepth = None
1196 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1198 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1197 snapshotdepth = 0
1199 snapshotdepth = 0
1198 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1200 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1199 # A delta chain should always be one full snapshot,
1201 # A delta chain should always be one full snapshot,
1200 # zero or more semi-snapshots, and zero or more deltas
1202 # zero or more semi-snapshots, and zero or more deltas
1201 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1203 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1202 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1204 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1203 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1205 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1204 delta = None
1206 delta = None
1205 if revinfo.cachedelta:
1207 if revinfo.cachedelta:
1206 cachebase = revinfo.cachedelta[0]
1208 cachebase = revinfo.cachedelta[0]
1207 # check if the diff still apply
1209 # check if the diff still apply
1208 currentbase = cachebase
1210 currentbase = cachebase
1209 while (
1211 while (
1210 currentbase != nullrev
1212 currentbase != nullrev
1211 and currentbase != base
1213 and currentbase != base
1212 and self.revlog.length(currentbase) == 0
1214 and self.revlog.length(currentbase) == 0
1213 ):
1215 ):
1214 currentbase = self.revlog.deltaparent(currentbase)
1216 currentbase = self.revlog.deltaparent(currentbase)
1215 if self.revlog.delta_config.lazy_delta and currentbase == base:
1217 if self.revlog.delta_config.lazy_delta and currentbase == base:
1216 delta = revinfo.cachedelta[1]
1218 delta = revinfo.cachedelta[1]
1217 if delta is None:
1219 if delta is None:
1218 delta = self._builddeltadiff(base, revinfo)
1220 delta = self._builddeltadiff(base, revinfo)
1219 if self._debug_search:
1221 if self._debug_search:
1220 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1222 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1221 msg %= len(delta)
1223 msg %= len(delta)
1222 self._write_debug(msg)
1224 self._write_debug(msg)
1223 # snapshotdept need to be neither None nor 0 level snapshot
1225 # snapshotdept need to be neither None nor 0 level snapshot
1224 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1226 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1225 lowestrealisticdeltalen = (
1227 lowestrealisticdeltalen = (
1226 len(delta) // revlog.delta_config.upper_bound_comp
1228 len(delta) // revlog.delta_config.upper_bound_comp
1227 )
1229 )
1228 snapshotlimit = revinfo.textlen >> snapshotdepth
1230 snapshotlimit = revinfo.textlen >> snapshotdepth
1229 if self._debug_search:
1231 if self._debug_search:
1230 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1232 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1231 msg %= lowestrealisticdeltalen
1233 msg %= lowestrealisticdeltalen
1232 self._write_debug(msg)
1234 self._write_debug(msg)
1233 if snapshotlimit < lowestrealisticdeltalen:
1235 if snapshotlimit < lowestrealisticdeltalen:
1234 if self._debug_search:
1236 if self._debug_search:
1235 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1237 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1236 self._write_debug(msg)
1238 self._write_debug(msg)
1237 return None
1239 return None
1238 if revlog.length(base) < lowestrealisticdeltalen:
1240 if revlog.length(base) < lowestrealisticdeltalen:
1239 if self._debug_search:
1241 if self._debug_search:
1240 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1242 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1241 self._write_debug(msg)
1243 self._write_debug(msg)
1242 return None
1244 return None
1243 header, data = revlog._inner.compress(delta)
1245 header, data = revlog._inner.compress(delta)
1244 deltalen = len(header) + len(data)
1246 deltalen = len(header) + len(data)
1245 offset = revlog.end(len(revlog) - 1)
1247 offset = revlog.end(len(revlog) - 1)
1246 dist = deltalen + offset - revlog.start(chainbase)
1248 dist = deltalen + offset - revlog.start(chainbase)
1247 chainlen, compresseddeltalen = revlog._chaininfo(base)
1249 chainlen, compresseddeltalen = revlog._chaininfo(base)
1248 chainlen += 1
1250 chainlen += 1
1249 compresseddeltalen += deltalen
1251 compresseddeltalen += deltalen
1250
1252
1251 return _deltainfo(
1253 return _deltainfo(
1252 dist,
1254 dist,
1253 deltalen,
1255 deltalen,
1254 (header, data),
1256 (header, data),
1255 deltabase,
1257 deltabase,
1256 chainbase,
1258 chainbase,
1257 chainlen,
1259 chainlen,
1258 compresseddeltalen,
1260 compresseddeltalen,
1259 snapshotdepth,
1261 snapshotdepth,
1260 )
1262 )
1261
1263
1262 def _fullsnapshotinfo(self, revinfo, curr):
1264 def _fullsnapshotinfo(self, revinfo, curr):
1263 rawtext = self.buildtext(revinfo)
1265 rawtext = self.buildtext(revinfo)
1264 data = self.revlog._inner.compress(rawtext)
1266 data = self.revlog._inner.compress(rawtext)
1265 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1267 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1266 deltabase = chainbase = curr
1268 deltabase = chainbase = curr
1267 snapshotdepth = 0
1269 snapshotdepth = 0
1268 chainlen = 1
1270 chainlen = 1
1269
1271
1270 return _deltainfo(
1272 return _deltainfo(
1271 dist,
1273 dist,
1272 deltalen,
1274 deltalen,
1273 data,
1275 data,
1274 deltabase,
1276 deltabase,
1275 chainbase,
1277 chainbase,
1276 chainlen,
1278 chainlen,
1277 compresseddeltalen,
1279 compresseddeltalen,
1278 snapshotdepth,
1280 snapshotdepth,
1279 )
1281 )
1280
1282
1281 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1283 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1282 """Find an acceptable delta against a candidate revision
1284 """Find an acceptable delta against a candidate revision
1283
1285
1284 revinfo: information about the revision (instance of _revisioninfo)
1286 revinfo: information about the revision (instance of _revisioninfo)
1285
1287
1286 Returns the first acceptable candidate revision, as ordered by
1288 Returns the first acceptable candidate revision, as ordered by
1287 _candidategroups
1289 _candidategroups
1288
1290
1289 If no suitable deltabase is found, we return delta info for a full
1291 If no suitable deltabase is found, we return delta info for a full
1290 snapshot.
1292 snapshot.
1291
1293
1292 `excluded_bases` is an optional set of revision that cannot be used as
1294 `excluded_bases` is an optional set of revision that cannot be used as
1293 a delta base. Use this to recompute delta suitable in censor or strip
1295 a delta base. Use this to recompute delta suitable in censor or strip
1294 context.
1296 context.
1295 """
1297 """
1296 if target_rev is None:
1298 if target_rev is None:
1297 target_rev = len(self.revlog)
1299 target_rev = len(self.revlog)
1298
1300
1299 gather_debug = self._gather_debug
1301 gather_debug = self._gather_debug
1300 cachedelta = revinfo.cachedelta
1302 cachedelta = revinfo.cachedelta
1301 revlog = self.revlog
1303 revlog = self.revlog
1302 p1r = p2r = None
1304 p1r = p2r = None
1303
1305
1304 if excluded_bases is None:
1306 if excluded_bases is None:
1305 excluded_bases = set()
1307 excluded_bases = set()
1306
1308
1307 if gather_debug:
1309 if gather_debug:
1308 start = util.timer()
1310 start = util.timer()
1309 dbg = self._one_dbg_data()
1311 dbg = self._one_dbg_data()
1310 dbg['revision'] = target_rev
1312 dbg['revision'] = target_rev
1311 target_revlog = b"UNKNOWN"
1313 target_revlog = b"UNKNOWN"
1312 target_type = self.revlog.target[0]
1314 target_type = self.revlog.target[0]
1313 target_key = self.revlog.target[1]
1315 target_key = self.revlog.target[1]
1314 if target_type == KIND_CHANGELOG:
1316 if target_type == KIND_CHANGELOG:
1315 target_revlog = b'CHANGELOG:'
1317 target_revlog = b'CHANGELOG:'
1316 elif target_type == KIND_MANIFESTLOG:
1318 elif target_type == KIND_MANIFESTLOG:
1317 target_revlog = b'MANIFESTLOG:'
1319 target_revlog = b'MANIFESTLOG:'
1318 if target_key:
1320 if target_key:
1319 target_revlog += b'%s:' % target_key
1321 target_revlog += b'%s:' % target_key
1320 elif target_type == KIND_FILELOG:
1322 elif target_type == KIND_FILELOG:
1321 target_revlog = b'FILELOG:'
1323 target_revlog = b'FILELOG:'
1322 if target_key:
1324 if target_key:
1323 target_revlog += b'%s:' % target_key
1325 target_revlog += b'%s:' % target_key
1324 dbg['target-revlog'] = target_revlog
1326 dbg['target-revlog'] = target_revlog
1325 p1r = revlog.rev(revinfo.p1)
1327 p1r = revlog.rev(revinfo.p1)
1326 p2r = revlog.rev(revinfo.p2)
1328 p2r = revlog.rev(revinfo.p2)
1327 if p1r != nullrev:
1329 if p1r != nullrev:
1328 p1_chain_len = revlog._chaininfo(p1r)[0]
1330 p1_chain_len = revlog._chaininfo(p1r)[0]
1329 else:
1331 else:
1330 p1_chain_len = -1
1332 p1_chain_len = -1
1331 if p2r != nullrev:
1333 if p2r != nullrev:
1332 p2_chain_len = revlog._chaininfo(p2r)[0]
1334 p2_chain_len = revlog._chaininfo(p2r)[0]
1333 else:
1335 else:
1334 p2_chain_len = -1
1336 p2_chain_len = -1
1335 dbg['p1-chain-len'] = p1_chain_len
1337 dbg['p1-chain-len'] = p1_chain_len
1336 dbg['p2-chain-len'] = p2_chain_len
1338 dbg['p2-chain-len'] = p2_chain_len
1337
1339
1338 # 1) if the revision is empty, no amount of delta can beat it
1340 # 1) if the revision is empty, no amount of delta can beat it
1339 #
1341 #
1340 # 2) no delta for flag processor revision (see "candelta" for why)
1342 # 2) no delta for flag processor revision (see "candelta" for why)
1341 # not calling candelta since only one revision needs test, also to
1343 # not calling candelta since only one revision needs test, also to
1342 # avoid overhead fetching flags again.
1344 # avoid overhead fetching flags again.
1343 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1345 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1344 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1346 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1345 if gather_debug:
1347 if gather_debug:
1346 end = util.timer()
1348 end = util.timer()
1347 dbg['duration'] = end - start
1349 dbg['duration'] = end - start
1348 dbg[
1350 dbg[
1349 'delta-base'
1351 'delta-base'
1350 ] = deltainfo.base # pytype: disable=attribute-error
1352 ] = deltainfo.base # pytype: disable=attribute-error
1351 dbg['search_round_count'] = 0
1353 dbg['search_round_count'] = 0
1352 dbg['using-cached-base'] = False
1354 dbg['using-cached-base'] = False
1353 dbg['delta_try_count'] = 0
1355 dbg['delta_try_count'] = 0
1354 dbg['type'] = b"full"
1356 dbg['type'] = b"full"
1355 dbg['snapshot-depth'] = 0
1357 dbg['snapshot-depth'] = 0
1356 self._dbg_process_data(dbg)
1358 self._dbg_process_data(dbg)
1357 return deltainfo
1359 return deltainfo
1358
1360
1359 deltainfo = None
1361 deltainfo = None
1360
1362
1361 # If this source delta are to be forcibly reuse, let us comply early.
1363 # If this source delta are to be forcibly reuse, let us comply early.
1362 if (
1364 if (
1363 revlog.delta_config.general_delta
1365 revlog.delta_config.general_delta
1364 and revinfo.cachedelta is not None
1366 and revinfo.cachedelta is not None
1365 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1367 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1366 ):
1368 ):
1367 base = revinfo.cachedelta[0]
1369 base = revinfo.cachedelta[0]
1368 if base == nullrev:
1370 if base == nullrev:
1369 dbg_type = b"full"
1371 dbg_type = b"full"
1370 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1372 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1371 if gather_debug:
1373 if gather_debug:
1372 snapshotdepth = 0
1374 snapshotdepth = 0
1373 elif base not in excluded_bases:
1375 elif base not in excluded_bases:
1374 delta = revinfo.cachedelta[1]
1376 delta = revinfo.cachedelta[1]
1375 header, data = revlog.compress(delta)
1377 header, data = revlog.compress(delta)
1376 deltalen = len(header) + len(data)
1378 deltalen = len(header) + len(data)
1377 if gather_debug:
1379 if gather_debug:
1378 offset = revlog.end(len(revlog) - 1)
1380 offset = revlog.end(len(revlog) - 1)
1379 chainbase = revlog.chainbase(base)
1381 chainbase = revlog.chainbase(base)
1380 distance = deltalen + offset - revlog.start(chainbase)
1382 distance = deltalen + offset - revlog.start(chainbase)
1381 chainlen, compresseddeltalen = revlog._chaininfo(base)
1383 chainlen, compresseddeltalen = revlog._chaininfo(base)
1382 chainlen += 1
1384 chainlen += 1
1383 compresseddeltalen += deltalen
1385 compresseddeltalen += deltalen
1384 if base == p1r or base == p2r:
1386 if base == p1r or base == p2r:
1385 dbg_type = b"delta"
1387 dbg_type = b"delta"
1386 snapshotdepth = None
1388 snapshotdepth = None
1387 elif not revlog.issnapshot(base):
1389 elif not revlog.issnapshot(base):
1388 snapshotdepth = None
1390 snapshotdepth = None
1389 else:
1391 else:
1390 dbg_type = b"snapshot"
1392 dbg_type = b"snapshot"
1391 snapshotdepth = revlog.snapshotdepth(base) + 1
1393 snapshotdepth = revlog.snapshotdepth(base) + 1
1392 else:
1394 else:
1393 distance = None
1395 distance = None
1394 chainbase = None
1396 chainbase = None
1395 chainlen = None
1397 chainlen = None
1396 compresseddeltalen = None
1398 compresseddeltalen = None
1397 snapshotdepth = None
1399 snapshotdepth = None
1398 deltainfo = _deltainfo(
1400 deltainfo = _deltainfo(
1399 distance=distance,
1401 distance=distance,
1400 deltalen=deltalen,
1402 deltalen=deltalen,
1401 data=(header, data),
1403 data=(header, data),
1402 base=base,
1404 base=base,
1403 chainbase=chainbase,
1405 chainbase=chainbase,
1404 chainlen=chainlen,
1406 chainlen=chainlen,
1405 compresseddeltalen=compresseddeltalen,
1407 compresseddeltalen=compresseddeltalen,
1406 snapshotdepth=snapshotdepth,
1408 snapshotdepth=snapshotdepth,
1407 )
1409 )
1408
1410
1409 if deltainfo is not None:
1411 if deltainfo is not None:
1410 if gather_debug:
1412 if gather_debug:
1411 end = util.timer()
1413 end = util.timer()
1412 dbg['duration'] = end - start
1414 dbg['duration'] = end - start
1413 dbg[
1415 dbg[
1414 'delta-base'
1416 'delta-base'
1415 ] = deltainfo.base # pytype: disable=attribute-error
1417 ] = deltainfo.base # pytype: disable=attribute-error
1416 dbg['search_round_count'] = 0
1418 dbg['search_round_count'] = 0
1417 dbg['using-cached-base'] = True
1419 dbg['using-cached-base'] = True
1418 dbg['delta_try_count'] = 0
1420 dbg['delta_try_count'] = 0
1419 dbg['type'] = b"full"
1421 dbg['type'] = b"full"
1420 if snapshotdepth is None:
1422 if snapshotdepth is None:
1421 dbg['snapshot-depth'] = 0
1423 dbg['snapshot-depth'] = 0
1422 else:
1424 else:
1423 dbg['snapshot-depth'] = snapshotdepth
1425 dbg['snapshot-depth'] = snapshotdepth
1424 self._dbg_process_data(dbg)
1426 self._dbg_process_data(dbg)
1425 return deltainfo
1427 return deltainfo
1426
1428
1427 # count the number of different delta we tried (for debug purpose)
1429 # count the number of different delta we tried (for debug purpose)
1428 dbg_try_count = 0
1430 dbg_try_count = 0
1429 # count the number of "search round" we did. (for debug purpose)
1431 # count the number of "search round" we did. (for debug purpose)
1430 dbg_try_rounds = 0
1432 dbg_try_rounds = 0
1431 dbg_type = b'unknown'
1433 dbg_type = b'unknown'
1432
1434
1433 if p1r is None:
1435 if p1r is None:
1434 p1r = revlog.rev(revinfo.p1)
1436 p1r = revlog.rev(revinfo.p1)
1435 p2r = revlog.rev(revinfo.p2)
1437 p2r = revlog.rev(revinfo.p2)
1436
1438
1437 if self._debug_search:
1439 if self._debug_search:
1438 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1440 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1439 msg %= target_rev
1441 msg %= target_rev
1440 self._write_debug(msg)
1442 self._write_debug(msg)
1441
1443
1442 search = _DeltaSearch(
1444 search = _DeltaSearch(
1443 self.revlog,
1445 self.revlog,
1444 revinfo.textlen,
1446 revinfo.textlen,
1445 p1r,
1447 p1r,
1446 p2r,
1448 p2r,
1447 cachedelta,
1449 cachedelta,
1448 excluded_bases,
1450 excluded_bases,
1449 target_rev,
1451 target_rev,
1450 snapshot_cache=self._snapshot_cache,
1452 snapshot_cache=self._snapshot_cache,
1451 )
1453 )
1452
1454
1453 groups = search.candidate_groups()
1455 groups = search.candidate_groups()
1454 candidaterevs = next(groups)
1456 candidaterevs = next(groups)
1455 while candidaterevs is not None:
1457 while candidaterevs is not None:
1456 dbg_try_rounds += 1
1458 dbg_try_rounds += 1
1457 if self._debug_search:
1459 if self._debug_search:
1458 prev = None
1460 prev = None
1459 if deltainfo is not None:
1461 if deltainfo is not None:
1460 prev = deltainfo.base
1462 prev = deltainfo.base
1461
1463
1462 if (
1464 if (
1463 cachedelta is not None
1465 cachedelta is not None
1464 and len(candidaterevs) == 1
1466 and len(candidaterevs) == 1
1465 and cachedelta[0] in candidaterevs
1467 and cachedelta[0] in candidaterevs
1466 ):
1468 ):
1467 round_type = b"cached-delta"
1469 round_type = b"cached-delta"
1468 elif p1r in candidaterevs or p2r in candidaterevs:
1470 elif p1r in candidaterevs or p2r in candidaterevs:
1469 round_type = b"parents"
1471 round_type = b"parents"
1470 elif prev is not None and all(c < prev for c in candidaterevs):
1472 elif prev is not None and all(c < prev for c in candidaterevs):
1471 round_type = b"refine-down"
1473 round_type = b"refine-down"
1472 elif prev is not None and all(c > prev for c in candidaterevs):
1474 elif prev is not None and all(c > prev for c in candidaterevs):
1473 round_type = b"refine-up"
1475 round_type = b"refine-up"
1474 else:
1476 else:
1475 round_type = b"search-down"
1477 round_type = b"search-down"
1476 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1478 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1477 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1479 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1478 self._write_debug(msg)
1480 self._write_debug(msg)
1479 nominateddeltas = []
1481 nominateddeltas = []
1480 if deltainfo is not None:
1482 if deltainfo is not None:
1481 if self._debug_search:
1483 if self._debug_search:
1482 msg = (
1484 msg = (
1483 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1485 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1484 )
1486 )
1485 msg %= (deltainfo.base, deltainfo.deltalen)
1487 msg %= (deltainfo.base, deltainfo.deltalen)
1486 self._write_debug(msg)
1488 self._write_debug(msg)
1487 # if we already found a good delta,
1489 # if we already found a good delta,
1488 # challenge it against refined candidates
1490 # challenge it against refined candidates
1489 nominateddeltas.append(deltainfo)
1491 nominateddeltas.append(deltainfo)
1490 for candidaterev in candidaterevs:
1492 for candidaterev in candidaterevs:
1491 if self._debug_search:
1493 if self._debug_search:
1492 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1494 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1493 msg %= candidaterev
1495 msg %= candidaterev
1494 self._write_debug(msg)
1496 self._write_debug(msg)
1495 candidate_type = None
1497 candidate_type = None
1496 if candidaterev == p1r:
1498 if candidaterev == p1r:
1497 candidate_type = b"p1"
1499 candidate_type = b"p1"
1498 elif candidaterev == p2r:
1500 elif candidaterev == p2r:
1499 candidate_type = b"p2"
1501 candidate_type = b"p2"
1500 elif self.revlog.issnapshot(candidaterev):
1502 elif self.revlog.issnapshot(candidaterev):
1501 candidate_type = b"snapshot-%d"
1503 candidate_type = b"snapshot-%d"
1502 candidate_type %= self.revlog.snapshotdepth(
1504 candidate_type %= self.revlog.snapshotdepth(
1503 candidaterev
1505 candidaterev
1504 )
1506 )
1505
1507
1506 if candidate_type is not None:
1508 if candidate_type is not None:
1507 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1509 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1508 msg %= candidate_type
1510 msg %= candidate_type
1509 self._write_debug(msg)
1511 self._write_debug(msg)
1510 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1512 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1511 msg %= self.revlog.length(candidaterev)
1513 msg %= self.revlog.length(candidaterev)
1512 self._write_debug(msg)
1514 self._write_debug(msg)
1513 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1515 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1514 msg %= self.revlog.deltaparent(candidaterev)
1516 msg %= self.revlog.deltaparent(candidaterev)
1515 self._write_debug(msg)
1517 self._write_debug(msg)
1516
1518
1517 dbg_try_count += 1
1519 dbg_try_count += 1
1518
1520
1519 if self._debug_search:
1521 if self._debug_search:
1520 delta_start = util.timer()
1522 delta_start = util.timer()
1521 candidatedelta = self._builddeltainfo(
1523 candidatedelta = self._builddeltainfo(
1522 revinfo,
1524 revinfo,
1523 candidaterev,
1525 candidaterev,
1524 target_rev=target_rev,
1526 target_rev=target_rev,
1525 )
1527 )
1526 if self._debug_search:
1528 if self._debug_search:
1527 delta_end = util.timer()
1529 delta_end = util.timer()
1528 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1530 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1529 msg %= delta_end - delta_start
1531 msg %= delta_end - delta_start
1530 self._write_debug(msg)
1532 self._write_debug(msg)
1531 if candidatedelta is not None:
1533 if candidatedelta is not None:
1532 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1534 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1533 if self._debug_search:
1535 if self._debug_search:
1534 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1536 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1535 msg %= candidatedelta.deltalen
1537 msg %= candidatedelta.deltalen
1536 self._write_debug(msg)
1538 self._write_debug(msg)
1537 nominateddeltas.append(candidatedelta)
1539 nominateddeltas.append(candidatedelta)
1538 elif self._debug_search:
1540 elif self._debug_search:
1539 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1541 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1540 msg %= candidatedelta.deltalen
1542 msg %= candidatedelta.deltalen
1541 self._write_debug(msg)
1543 self._write_debug(msg)
1542 elif self._debug_search:
1544 elif self._debug_search:
1543 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1545 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1544 self._write_debug(msg)
1546 self._write_debug(msg)
1545 if nominateddeltas:
1547 if nominateddeltas:
1546 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1548 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1547 if deltainfo is not None:
1549 if deltainfo is not None:
1548 candidaterevs = groups.send(deltainfo.base)
1550 candidaterevs = groups.send(deltainfo.base)
1549 else:
1551 else:
1550 candidaterevs = next(groups)
1552 candidaterevs = next(groups)
1551
1553
1552 if deltainfo is None:
1554 if deltainfo is None:
1553 dbg_type = b"full"
1555 dbg_type = b"full"
1554 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1556 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1555 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1557 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1556 dbg_type = b"snapshot"
1558 dbg_type = b"snapshot"
1557 else:
1559 else:
1558 dbg_type = b"delta"
1560 dbg_type = b"delta"
1559
1561
1560 if gather_debug:
1562 if gather_debug:
1561 end = util.timer()
1563 end = util.timer()
1562 if dbg_type == b'full':
1564 if dbg_type == b'full':
1563 used_cached = (
1565 used_cached = (
1564 cachedelta is not None
1566 cachedelta is not None
1565 and dbg_try_rounds == 0
1567 and dbg_try_rounds == 0
1566 and dbg_try_count == 0
1568 and dbg_try_count == 0
1567 and cachedelta[0] == nullrev
1569 and cachedelta[0] == nullrev
1568 )
1570 )
1569 else:
1571 else:
1570 used_cached = (
1572 used_cached = (
1571 cachedelta is not None
1573 cachedelta is not None
1572 and dbg_try_rounds == 1
1574 and dbg_try_rounds == 1
1573 and dbg_try_count == 1
1575 and dbg_try_count == 1
1574 and deltainfo.base == cachedelta[0]
1576 and deltainfo.base == cachedelta[0]
1575 )
1577 )
1576 dbg['duration'] = end - start
1578 dbg['duration'] = end - start
1577 dbg[
1579 dbg[
1578 'delta-base'
1580 'delta-base'
1579 ] = deltainfo.base # pytype: disable=attribute-error
1581 ] = deltainfo.base # pytype: disable=attribute-error
1580 dbg['search_round_count'] = dbg_try_rounds
1582 dbg['search_round_count'] = dbg_try_rounds
1581 dbg['using-cached-base'] = used_cached
1583 dbg['using-cached-base'] = used_cached
1582 dbg['delta_try_count'] = dbg_try_count
1584 dbg['delta_try_count'] = dbg_try_count
1583 dbg['type'] = dbg_type
1585 dbg['type'] = dbg_type
1584 if (
1586 if (
1585 deltainfo.snapshotdepth # pytype: disable=attribute-error
1587 deltainfo.snapshotdepth # pytype: disable=attribute-error
1586 is not None
1588 is not None
1587 ):
1589 ):
1588 dbg[
1590 dbg[
1589 'snapshot-depth'
1591 'snapshot-depth'
1590 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1592 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1591 else:
1593 else:
1592 dbg['snapshot-depth'] = 0
1594 dbg['snapshot-depth'] = 0
1593 self._dbg_process_data(dbg)
1595 self._dbg_process_data(dbg)
1594 return deltainfo
1596 return deltainfo
1595
1597
1596 def _one_dbg_data(self):
1598 def _one_dbg_data(self):
1597 return {
1599 return {
1598 'duration': None,
1600 'duration': None,
1599 'revision': None,
1601 'revision': None,
1600 'delta-base': None,
1602 'delta-base': None,
1601 'search_round_count': None,
1603 'search_round_count': None,
1602 'using-cached-base': None,
1604 'using-cached-base': None,
1603 'delta_try_count': None,
1605 'delta_try_count': None,
1604 'type': None,
1606 'type': None,
1605 'p1-chain-len': None,
1607 'p1-chain-len': None,
1606 'p2-chain-len': None,
1608 'p2-chain-len': None,
1607 'snapshot-depth': None,
1609 'snapshot-depth': None,
1608 'target-revlog': None,
1610 'target-revlog': None,
1609 }
1611 }
1610
1612
1611 def _dbg_process_data(self, dbg):
1613 def _dbg_process_data(self, dbg):
1612 if self._debug_info is not None:
1614 if self._debug_info is not None:
1613 self._debug_info.append(dbg)
1615 self._debug_info.append(dbg)
1614
1616
1615 if self._write_debug is not None:
1617 if self._write_debug is not None:
1616 msg = (
1618 msg = (
1617 b"DBG-DELTAS:"
1619 b"DBG-DELTAS:"
1618 b" %-12s"
1620 b" %-12s"
1619 b" rev=%d:"
1621 b" rev=%d:"
1620 b" delta-base=%d"
1622 b" delta-base=%d"
1621 b" is-cached=%d"
1623 b" is-cached=%d"
1622 b" - search-rounds=%d"
1624 b" - search-rounds=%d"
1623 b" try-count=%d"
1625 b" try-count=%d"
1624 b" - delta-type=%-6s"
1626 b" - delta-type=%-6s"
1625 b" snap-depth=%d"
1627 b" snap-depth=%d"
1626 b" - p1-chain-length=%d"
1628 b" - p1-chain-length=%d"
1627 b" p2-chain-length=%d"
1629 b" p2-chain-length=%d"
1628 b" - duration=%f"
1630 b" - duration=%f"
1629 b"\n"
1631 b"\n"
1630 )
1632 )
1631 msg %= (
1633 msg %= (
1632 dbg["target-revlog"],
1634 dbg["target-revlog"],
1633 dbg["revision"],
1635 dbg["revision"],
1634 dbg["delta-base"],
1636 dbg["delta-base"],
1635 dbg["using-cached-base"],
1637 dbg["using-cached-base"],
1636 dbg["search_round_count"],
1638 dbg["search_round_count"],
1637 dbg["delta_try_count"],
1639 dbg["delta_try_count"],
1638 dbg["type"],
1640 dbg["type"],
1639 dbg["snapshot-depth"],
1641 dbg["snapshot-depth"],
1640 dbg["p1-chain-len"],
1642 dbg["p1-chain-len"],
1641 dbg["p2-chain-len"],
1643 dbg["p2-chain-len"],
1642 dbg["duration"],
1644 dbg["duration"],
1643 )
1645 )
1644 self._write_debug(msg)
1646 self._write_debug(msg)
1645
1647
1646
1648
1647 def delta_compression(default_compression_header, deltainfo):
1649 def delta_compression(default_compression_header, deltainfo):
1648 """return (COMPRESSION_MODE, deltainfo)
1650 """return (COMPRESSION_MODE, deltainfo)
1649
1651
1650 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1652 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1651 compression.
1653 compression.
1652 """
1654 """
1653 h, d = deltainfo.data
1655 h, d = deltainfo.data
1654 compression_mode = COMP_MODE_INLINE
1656 compression_mode = COMP_MODE_INLINE
1655 if not h and not d:
1657 if not h and not d:
1656 # not data to store at all... declare them uncompressed
1658 # not data to store at all... declare them uncompressed
1657 compression_mode = COMP_MODE_PLAIN
1659 compression_mode = COMP_MODE_PLAIN
1658 elif not h:
1660 elif not h:
1659 t = d[0:1]
1661 t = d[0:1]
1660 if t == b'\0':
1662 if t == b'\0':
1661 compression_mode = COMP_MODE_PLAIN
1663 compression_mode = COMP_MODE_PLAIN
1662 elif t == default_compression_header:
1664 elif t == default_compression_header:
1663 compression_mode = COMP_MODE_DEFAULT
1665 compression_mode = COMP_MODE_DEFAULT
1664 elif h == b'u':
1666 elif h == b'u':
1665 # we have a more efficient way to declare uncompressed
1667 # we have a more efficient way to declare uncompressed
1666 h = b''
1668 h = b''
1667 compression_mode = COMP_MODE_PLAIN
1669 compression_mode = COMP_MODE_PLAIN
1668 deltainfo = drop_u_compression(deltainfo)
1670 deltainfo = drop_u_compression(deltainfo)
1669 return compression_mode, deltainfo
1671 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now