##// END OF EJS Templates
revlog: remove legacy usage of `_srmingapsize`...
marmoute -
r51956:533d6943 default
parent child Browse files
Show More
@@ -1,1629 +1,1630 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self._srmingapsize = mingap
54 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
55 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
56 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
57 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
58 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
59 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
60 self.index = None
59 self.index = None
61
60
62 def start(self, rev):
61 def start(self, rev):
63 if rev == nullrev:
62 if rev == nullrev:
64 return 0
63 return 0
65 if rev == 0:
64 if rev == 0:
66 return 0
65 return 0
67 return self._data[rev - 1]
66 return self._data[rev - 1]
68
67
69 def end(self, rev):
68 def end(self, rev):
70 if rev == nullrev:
69 if rev == nullrev:
71 return 0
70 return 0
72 return self._data[rev]
71 return self._data[rev]
73
72
74 def length(self, rev):
73 def length(self, rev):
75 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
76
75
77 def __len__(self):
76 def __len__(self):
78 return len(self._data)
77 return len(self._data)
79
78
80 def issnapshot(self, rev):
79 def issnapshot(self, rev):
81 if rev == nullrev:
80 if rev == nullrev:
82 return True
81 return True
83 return rev in self._snapshot
82 return rev in self._snapshot
84
83
85
84
86 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
87 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
88
87
89 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
90 Assume that revs are sorted.
89 Assume that revs are sorted.
91
90
92 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 than `revlog._srmingapsize` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
95
94
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
98 `revlog._srmingapsize`.
97 `revlog.data_config.sr_min_gap_size`.
99
98
100 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
101 be raised individually.
100 be raised individually.
102
101
103 >>> data = [
102 >>> data = [
104 ... 5, #00 (5)
103 ... 5, #00 (5)
105 ... 10, #01 (5)
104 ... 10, #01 (5)
106 ... 12, #02 (2)
105 ... 12, #02 (2)
107 ... 12, #03 (empty)
106 ... 12, #03 (empty)
108 ... 27, #04 (15)
107 ... 27, #04 (15)
109 ... 31, #05 (4)
108 ... 31, #05 (4)
110 ... 31, #06 (empty)
109 ... 31, #06 (empty)
111 ... 42, #07 (11)
110 ... 42, #07 (11)
112 ... 47, #08 (5)
111 ... 47, #08 (5)
113 ... 47, #09 (empty)
112 ... 47, #09 (empty)
114 ... 48, #10 (1)
113 ... 48, #10 (1)
115 ... 51, #11 (3)
114 ... 51, #11 (3)
116 ... 74, #12 (23)
115 ... 74, #12 (23)
117 ... 85, #13 (11)
116 ... 85, #13 (11)
118 ... 86, #14 (1)
117 ... 86, #14 (1)
119 ... 91, #15 (5)
118 ... 91, #15 (5)
120 ... ]
119 ... ]
121 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
122
121
123 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
126 [[0], [15]]
125 [[0], [15]]
127 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
128 [[0], [11], [15]]
127 [[0], [11], [15]]
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
133
132
134 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
139
138
140 Slicing involving nullrev
139 Slicing involving nullrev
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 [[-1], [13], [15]]
143 [[-1], [13], [15]]
145 """
144 """
146 if targetsize is not None:
145 if targetsize is not None:
147 targetsize = max(targetsize, revlog._srmingapsize)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
149 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 if densityslicing is None:
150 if densityslicing is None:
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 for chunk in densityslicing(
152 for chunk in densityslicing(
154 revs, revlog.data_config.sr_density_threshold, revlog._srmingapsize
153 revs,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 ):
156 ):
156 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 yield subchunk
158 yield subchunk
158
159
159
160
160 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 """slice revs to match the target size
162 """slice revs to match the target size
162
163
163 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
164 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
165 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
167
168
168 >>> data = [
169 >>> data = [
169 ... 3, #0 (3)
170 ... 3, #0 (3)
170 ... 5, #1 (2)
171 ... 5, #1 (2)
171 ... 6, #2 (1)
172 ... 6, #2 (1)
172 ... 8, #3 (2)
173 ... 8, #3 (2)
173 ... 8, #4 (empty)
174 ... 8, #4 (empty)
174 ... 11, #5 (3)
175 ... 11, #5 (3)
175 ... 12, #6 (1)
176 ... 12, #6 (1)
176 ... 13, #7 (1)
177 ... 13, #7 (1)
177 ... 14, #8 (1)
178 ... 14, #8 (1)
178 ... ]
179 ... ]
179
180
180 == All snapshots cases ==
181 == All snapshots cases ==
181 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182
183
183 Cases where chunk is already small enough
184 Cases where chunk is already small enough
184 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 [[0]]
186 [[0]]
186 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 [[6, 7]]
188 [[6, 7]]
188 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
189 [[0]]
190 [[0]]
190 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 [[6, 7]]
192 [[6, 7]]
192
193
193 cases where we need actual slicing
194 cases where we need actual slicing
194 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 [[0], [1]]
196 [[0], [1]]
196 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 [[1], [3]]
198 [[1], [3]]
198 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 [[1, 2], [3]]
200 [[1, 2], [3]]
200 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 [[3], [5]]
202 [[3], [5]]
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 [[3], [5]]
204 [[3], [5]]
204 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
206 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208
209
209 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
210 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 [[0], [1]]
212 [[0], [1]]
212 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 [[1], [3]]
214 [[1], [3]]
214 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 [[3], [5]]
216 [[3], [5]]
216
217
217 == No Snapshot cases ==
218 == No Snapshot cases ==
218 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
219
220
220 Cases where chunk is already small enough
221 Cases where chunk is already small enough
221 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 [[0]]
223 [[0]]
223 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 [[6, 7]]
225 [[6, 7]]
225 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
226 [[0]]
227 [[0]]
227 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 [[6, 7]]
229 [[6, 7]]
229
230
230 cases where we need actual slicing
231 cases where we need actual slicing
231 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 [[0], [1]]
233 [[0], [1]]
233 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 [[1], [3]]
235 [[1], [3]]
235 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 [[1], [2, 3]]
237 [[1], [2, 3]]
237 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 [[3], [5]]
239 [[3], [5]]
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 [[3], [4, 5]]
241 [[3], [4, 5]]
241 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
243 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245
246
246 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
247 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 [[0], [1]]
249 [[0], [1]]
249 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 [[1], [3]]
251 [[1], [3]]
251 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 [[3], [5]]
253 [[3], [5]]
253
254
254 == mixed case ==
255 == mixed case ==
255 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 """
259 """
259 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
260 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
261 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
262 fullspan = enddata - startdata
263 fullspan = enddata - startdata
263 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
264 yield revs
265 yield revs
265 return
266 return
266
267
267 startrevidx = 0
268 startrevidx = 0
268 endrevidx = 1
269 endrevidx = 1
269 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
270 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
271 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
272 for idx, r in iterrevs:
273 for idx, r in iterrevs:
273 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
274 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
275 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
276 endrevidx = idx + 1
277 endrevidx = idx + 1
277 else:
278 else:
278 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 if chunk:
280 if chunk:
280 yield chunk
281 yield chunk
281 startrevidx = idx
282 startrevidx = idx
282 startdata = revlog.start(r)
283 startdata = revlog.start(r)
283 endrevidx = idx + 1
284 endrevidx = idx + 1
284 if not snapshot:
285 if not snapshot:
285 break
286 break
286
287
287 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
288 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
289 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
291 nbitem = len(revs)
292 nbitem = len(revs)
292 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
293 endrevidx = nbitem
294 endrevidx = nbitem
294 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
295 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
296 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
297 span = localenddata - startdata
298 span = localenddata - startdata
298 while span > targetsize:
299 while span > targetsize:
299 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
300 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
301 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
302 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
303 span = localenddata - startdata
304 span = localenddata - startdata
304 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 if chunk:
306 if chunk:
306 yield chunk
307 yield chunk
307 startrevidx = endrevidx
308 startrevidx = endrevidx
308 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
309
310
310 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 if chunk:
312 if chunk:
312 yield chunk
313 yield chunk
313
314
314
315
315 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317
318
318 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
319 Assume that revs are sorted.
320 Assume that revs are sorted.
320
321
321 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 skipped.
324 skipped.
324
325
325 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
326 ... 5, #00 (5)
327 ... 5, #00 (5)
327 ... 10, #01 (5)
328 ... 10, #01 (5)
328 ... 12, #02 (2)
329 ... 12, #02 (2)
329 ... 12, #03 (empty)
330 ... 12, #03 (empty)
330 ... 27, #04 (15)
331 ... 27, #04 (15)
331 ... 31, #05 (4)
332 ... 31, #05 (4)
332 ... 31, #06 (empty)
333 ... 31, #06 (empty)
333 ... 42, #07 (11)
334 ... 42, #07 (11)
334 ... 47, #08 (5)
335 ... 47, #08 (5)
335 ... 47, #09 (empty)
336 ... 47, #09 (empty)
336 ... 48, #10 (1)
337 ... 48, #10 (1)
337 ... 51, #11 (3)
338 ... 51, #11 (3)
338 ... 74, #12 (23)
339 ... 74, #12 (23)
339 ... 85, #13 (11)
340 ... 85, #13 (11)
340 ... 86, #14 (1)
341 ... 86, #14 (1)
341 ... 91, #15 (5)
342 ... 91, #15 (5)
342 ... ])
343 ... ])
343
344
344 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 [[0], [15]]
348 [[0], [15]]
348 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 [[0], [11], [15]]
350 [[0], [11], [15]]
350 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 ... mingapsize=20))
356 ... mingapsize=20))
356 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 ... targetdensity=0.95))
359 ... targetdensity=0.95))
359 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
360 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
362 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
363 """
364 """
364 start = revlog.start
365 start = revlog.start
365 length = revlog.length
366 length = revlog.length
366
367
367 if len(revs) <= 1:
368 if len(revs) <= 1:
368 yield revs
369 yield revs
369 return
370 return
370
371
371 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
372
373
373 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
374 yield revs
375 yield revs
375 return
376 return
376
377
377 readdata = deltachainspan
378 readdata = deltachainspan
378 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
379
380
380 if deltachainspan:
381 if deltachainspan:
381 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
382 else:
383 else:
383 density = 1.0
384 density = 1.0
384
385
385 if density >= targetdensity:
386 if density >= targetdensity:
386 yield revs
387 yield revs
387 return
388 return
388
389
389 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
390 gaps = []
391 gaps = []
391 prevend = None
392 prevend = None
392 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
393 revstart = start(rev)
394 revstart = start(rev)
394 revlen = length(rev)
395 revlen = length(rev)
395
396
396 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
397 if revlen == 0:
398 if revlen == 0:
398 continue
399 continue
399
400
400 if prevend is not None:
401 if prevend is not None:
401 gapsize = revstart - prevend
402 gapsize = revstart - prevend
402 # only consider holes that are large enough
403 # only consider holes that are large enough
403 if gapsize > mingapsize:
404 if gapsize > mingapsize:
404 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
405
406
406 prevend = revstart + revlen
407 prevend = revstart + revlen
407 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
408 gaps.sort()
409 gaps.sort()
409
410
410 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
411 selected = []
412 selected = []
412 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
413 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
414
415
415 selected.append(gapidx)
416 selected.append(gapidx)
416
417
417 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # by the heap
419 # by the heap
419 readdata -= gapsize
420 readdata -= gapsize
420 if readdata > 0:
421 if readdata > 0:
421 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
422 else:
423 else:
423 density = 1.0
424 density = 1.0
424 selected.sort()
425 selected.sort()
425
426
426 # Cut the revs at collected indices
427 # Cut the revs at collected indices
427 previdx = 0
428 previdx = 0
428 for idx in selected:
429 for idx in selected:
429
430
430 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 if chunk:
432 if chunk:
432 yield chunk
433 yield chunk
433
434
434 previdx = idx
435 previdx = idx
435
436
436 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
437 if chunk:
438 if chunk:
438 yield chunk
439 yield chunk
439
440
440
441
441 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
443
444
444 Doctest Setup
445 Doctest Setup
445 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
446 ... 5, #0
447 ... 5, #0
447 ... 10, #1
448 ... 10, #1
448 ... 12, #2
449 ... 12, #2
449 ... 12, #3 (empty)
450 ... 12, #3 (empty)
450 ... 17, #4
451 ... 17, #4
451 ... 21, #5
452 ... 21, #5
452 ... 21, #6 (empty)
453 ... 21, #6 (empty)
453 ... ])
454 ... ])
454
455
455 Contiguous cases:
456 Contiguous cases:
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 [0, 1, 2]
462 [0, 1, 2]
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 [2]
464 [2]
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 [3, 4, 5]
466 [3, 4, 5]
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 [3, 4]
468 [3, 4]
468
469
469 Discontiguous cases:
470 Discontiguous cases:
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 [1, 3, 5]
472 [1, 3, 5]
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 [1]
474 [1]
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 [3, 5]
476 [3, 5]
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 [3, 5]
478 [3, 5]
478 """
479 """
479 length = revlog.length
480 length = revlog.length
480
481
481 if endidx is None:
482 if endidx is None:
482 endidx = len(revs)
483 endidx = len(revs)
483
484
484 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
486 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
487 while (
488 while (
488 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 ):
490 ):
490 endidx -= 1
491 endidx -= 1
491
492
492 return revs[startidx:endidx]
493 return revs[startidx:endidx]
493
494
494
495
495 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
496 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
497
498
498 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
499
500
500 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
501 ... 5, #0
502 ... 5, #0
502 ... 10, #1
503 ... 10, #1
503 ... 12, #2
504 ... 12, #2
504 ... 12, #3 (empty)
505 ... 12, #3 (empty)
505 ... 17, #4
506 ... 17, #4
506 ... ])
507 ... ])
507
508
508 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 17
510 17
510 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
511 17
512 17
512 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
513 5
514 5
514 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
515 7
516 7
516 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
517 7
518 7
518 """
519 """
519 if not revs:
520 if not revs:
520 return 0
521 return 0
521 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
522 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
523
524
524
525
525 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
527 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
528 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
529 # they're decoded.
530 # they're decoded.
530 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
531 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
532 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
533 ):
534 ):
534 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
535 else:
536 else:
536 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
537 # equivalent to non-raw text
538 # equivalent to non-raw text
538 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
539 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
540
541
541 try:
542 try:
542 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 if validatehash:
544 if validatehash:
544 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
546 raise error.StorageError(
547 raise error.StorageError(
547 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
548 )
549 )
549 except error.CensoredNodeError:
550 except error.CensoredNodeError:
550 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
551 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
552 raise
553 raise
553 return fulltext
554 return fulltext
554
555
555
556
556 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
557 class _deltainfo:
558 class _deltainfo:
558 distance = attr.ib()
559 distance = attr.ib()
559 deltalen = attr.ib()
560 deltalen = attr.ib()
560 data = attr.ib()
561 data = attr.ib()
561 base = attr.ib()
562 base = attr.ib()
562 chainbase = attr.ib()
563 chainbase = attr.ib()
563 chainlen = attr.ib()
564 chainlen = attr.ib()
564 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
565 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
566
567
567
568
568 def drop_u_compression(delta):
569 def drop_u_compression(delta):
569 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
570
571
571 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
572 """
573 """
573 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
574 return _deltainfo(
575 return _deltainfo(
575 delta.distance,
576 delta.distance,
576 delta.deltalen - 1,
577 delta.deltalen - 1,
577 (b'', delta.data[1]),
578 (b'', delta.data[1]),
578 delta.base,
579 delta.base,
579 delta.chainbase,
580 delta.chainbase,
580 delta.chainlen,
581 delta.chainlen,
581 delta.compresseddeltalen,
582 delta.compresseddeltalen,
582 delta.snapshotdepth,
583 delta.snapshotdepth,
583 )
584 )
584
585
585
586
586 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
588 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
589 performant."""
590 performant."""
590 if deltainfo is None:
591 if deltainfo is None:
591 return False
592 return False
592
593
593 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
595 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
596 assert (
597 assert (
597 revinfo.cachedelta is None
598 revinfo.cachedelta is None
598 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
600 )
601 )
601
602
602 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
604 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # we consume.
607 # we consume.
607
608
608 textlen = revinfo.textlen
609 textlen = revinfo.textlen
609 defaultmax = textlen * 4
610 defaultmax = textlen * 4
610 maxdist = revlog.delta_config.max_deltachain_span
611 maxdist = revlog.delta_config.max_deltachain_span
611 if not maxdist:
612 if not maxdist:
612 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
614
615
615 # Bad delta from read span:
616 # Bad delta from read span:
616 #
617 #
617 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
618 #
619 #
619 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
621 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
622 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
623 # we skip the span check entirely.
624 # we skip the span check entirely.
624 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 return False
626 return False
626
627
627 # Bad delta from new delta size:
628 # Bad delta from new delta size:
628 #
629 #
629 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
630 # delta will be inefficient.
631 # delta will be inefficient.
631 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
632 return False
633 return False
633
634
634 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
635 #
636 #
636 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
637 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 return False
639 return False
639
640
640 # Bad delta from chain length:
641 # Bad delta from chain length:
641 #
642 #
642 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
643 if (
644 if (
644 revlog.delta_config.max_chain_len
645 revlog.delta_config.max_chain_len
645 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 ):
647 ):
647 return False
648 return False
648
649
649 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
650 #
651 #
651 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
652 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
653 # created.
654 # created.
654 if (
655 if (
655 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
656 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 ):
658 ):
658 return False
659 return False
659
660
660 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
661 # snapshot
662 # snapshot
662 if (
663 if (
663 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
664 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 ):
666 ):
666 return False
667 return False
667
668
668 return True
669 return True
669
670
670
671
671 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
672 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # consider these candidates.
674 # consider these candidates.
674 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
675
676
676
677
677 def _candidategroups(
678 def _candidategroups(
678 revlog,
679 revlog,
679 textlen,
680 textlen,
680 p1,
681 p1,
681 p2,
682 p2,
682 cachedelta,
683 cachedelta,
683 excluded_bases=None,
684 excluded_bases=None,
684 target_rev=None,
685 target_rev=None,
685 snapshot_cache=None,
686 snapshot_cache=None,
686 ):
687 ):
687 """Provides group of revision to be tested as delta base
688 """Provides group of revision to be tested as delta base
688
689
689 This top level function focus on emitting groups with unique and worthwhile
690 This top level function focus on emitting groups with unique and worthwhile
690 content. See _raw_candidate_groups for details about the group order.
691 content. See _raw_candidate_groups for details about the group order.
691 """
692 """
692 # should we try to build a delta?
693 # should we try to build a delta?
693 if not (len(revlog) and revlog._storedeltachains):
694 if not (len(revlog) and revlog._storedeltachains):
694 yield None
695 yield None
695 return
696 return
696
697
697 if target_rev is None:
698 if target_rev is None:
698 target_rev = len(revlog)
699 target_rev = len(revlog)
699
700
700 if not revlog.delta_config.general_delta:
701 if not revlog.delta_config.general_delta:
701 # before general delta, there is only one possible delta base
702 # before general delta, there is only one possible delta base
702 yield (target_rev - 1,)
703 yield (target_rev - 1,)
703 yield None
704 yield None
704 return
705 return
705
706
706 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
707 # we should never end up asking such question. Adding the assert as a
708 # we should never end up asking such question. Adding the assert as a
708 # safe-guard to detect anything that would be fishy in this regard.
709 # safe-guard to detect anything that would be fishy in this regard.
709 assert (
710 assert (
710 cachedelta is None
711 cachedelta is None
711 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
712 or not revlog.delta_config.general_delta
713 or not revlog.delta_config.general_delta
713 )
714 )
714
715
715 deltalength = revlog.length
716 deltalength = revlog.length
716 deltaparent = revlog.deltaparent
717 deltaparent = revlog.deltaparent
717 sparse = revlog.delta_config.sparse_revlog
718 sparse = revlog.delta_config.sparse_revlog
718 good = None
719 good = None
719
720
720 deltas_limit = textlen * LIMIT_DELTA2TEXT
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
721 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
722
723
723 tested = {nullrev}
724 tested = {nullrev}
724 candidates = _refinedgroups(
725 candidates = _refinedgroups(
725 revlog,
726 revlog,
726 p1,
727 p1,
727 p2,
728 p2,
728 cachedelta,
729 cachedelta,
729 snapshot_cache=snapshot_cache,
730 snapshot_cache=snapshot_cache,
730 )
731 )
731 while True:
732 while True:
732 temptative = candidates.send(good)
733 temptative = candidates.send(good)
733 if temptative is None:
734 if temptative is None:
734 break
735 break
735 group = []
736 group = []
736 for rev in temptative:
737 for rev in temptative:
737 # skip over empty delta (no need to include them in a chain)
738 # skip over empty delta (no need to include them in a chain)
738 while not (rev == nullrev or rev in tested or deltalength(rev)):
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
739 tested.add(rev)
740 tested.add(rev)
740 rev = deltaparent(rev)
741 rev = deltaparent(rev)
741 # no need to try a delta against nullrev, this will be done as a
742 # no need to try a delta against nullrev, this will be done as a
742 # last resort.
743 # last resort.
743 if rev == nullrev:
744 if rev == nullrev:
744 continue
745 continue
745 # filter out revision we tested already
746 # filter out revision we tested already
746 if rev in tested:
747 if rev in tested:
747 continue
748 continue
748
749
749 # an higher authority deamed the base unworthy (e.g. censored)
750 # an higher authority deamed the base unworthy (e.g. censored)
750 if excluded_bases is not None and rev in excluded_bases:
751 if excluded_bases is not None and rev in excluded_bases:
751 tested.add(rev)
752 tested.add(rev)
752 continue
753 continue
753 # We are in some recomputation cases and that rev is too high in
754 # We are in some recomputation cases and that rev is too high in
754 # the revlog
755 # the revlog
755 if target_rev is not None and rev >= target_rev:
756 if target_rev is not None and rev >= target_rev:
756 tested.add(rev)
757 tested.add(rev)
757 continue
758 continue
758 # filter out delta base that will never produce good delta
759 # filter out delta base that will never produce good delta
759 if deltas_limit < revlog.length(rev):
760 if deltas_limit < revlog.length(rev):
760 tested.add(rev)
761 tested.add(rev)
761 continue
762 continue
762 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 tested.add(rev)
764 tested.add(rev)
764 continue
765 continue
765 # no delta for rawtext-changing revs (see "candelta" for why)
766 # no delta for rawtext-changing revs (see "candelta" for why)
766 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 tested.add(rev)
768 tested.add(rev)
768 continue
769 continue
769
770
770 # If we reach here, we are about to build and test a delta.
771 # If we reach here, we are about to build and test a delta.
771 # The delta building process will compute the chaininfo in all
772 # The delta building process will compute the chaininfo in all
772 # case, since that computation is cached, it is fine to access it
773 # case, since that computation is cached, it is fine to access it
773 # here too.
774 # here too.
774 chainlen, chainsize = revlog._chaininfo(rev)
775 chainlen, chainsize = revlog._chaininfo(rev)
775 # if chain will be too long, skip base
776 # if chain will be too long, skip base
776 if (
777 if (
777 revlog.delta_config.max_chain_len
778 revlog.delta_config.max_chain_len
778 and chainlen >= revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
779 ):
780 ):
780 tested.add(rev)
781 tested.add(rev)
781 continue
782 continue
782 # if chain already have too much data, skip base
783 # if chain already have too much data, skip base
783 if deltas_limit < chainsize:
784 if deltas_limit < chainsize:
784 tested.add(rev)
785 tested.add(rev)
785 continue
786 continue
786 if sparse and revlog.upperboundcomp is not None:
787 if sparse and revlog.upperboundcomp is not None:
787 maxcomp = revlog.upperboundcomp
788 maxcomp = revlog.upperboundcomp
788 basenotsnap = (p1, p2, nullrev)
789 basenotsnap = (p1, p2, nullrev)
789 if rev not in basenotsnap and revlog.issnapshot(rev):
790 if rev not in basenotsnap and revlog.issnapshot(rev):
790 snapshotdepth = revlog.snapshotdepth(rev)
791 snapshotdepth = revlog.snapshotdepth(rev)
791 # If text is significantly larger than the base, we can
792 # If text is significantly larger than the base, we can
792 # expect the resulting delta to be proportional to the size
793 # expect the resulting delta to be proportional to the size
793 # difference
794 # difference
794 revsize = revlog.rawsize(rev)
795 revsize = revlog.rawsize(rev)
795 rawsizedistance = max(textlen - revsize, 0)
796 rawsizedistance = max(textlen - revsize, 0)
796 # use an estimate of the compression upper bound.
797 # use an estimate of the compression upper bound.
797 lowestrealisticdeltalen = rawsizedistance // maxcomp
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
798
799
799 # check the absolute constraint on the delta size
800 # check the absolute constraint on the delta size
800 snapshotlimit = textlen >> snapshotdepth
801 snapshotlimit = textlen >> snapshotdepth
801 if snapshotlimit < lowestrealisticdeltalen:
802 if snapshotlimit < lowestrealisticdeltalen:
802 # delta lower bound is larger than accepted upper bound
803 # delta lower bound is larger than accepted upper bound
803 tested.add(rev)
804 tested.add(rev)
804 continue
805 continue
805
806
806 # check the relative constraint on the delta size
807 # check the relative constraint on the delta size
807 revlength = revlog.length(rev)
808 revlength = revlog.length(rev)
808 if revlength < lowestrealisticdeltalen:
809 if revlength < lowestrealisticdeltalen:
809 # delta probable lower bound is larger than target base
810 # delta probable lower bound is larger than target base
810 tested.add(rev)
811 tested.add(rev)
811 continue
812 continue
812
813
813 group.append(rev)
814 group.append(rev)
814 if group:
815 if group:
815 # When the size of the candidate group is big, it can result in a
816 # When the size of the candidate group is big, it can result in a
816 # quite significant performance impact. To reduce this, we can send
817 # quite significant performance impact. To reduce this, we can send
817 # them in smaller batches until the new batch does not provide any
818 # them in smaller batches until the new batch does not provide any
818 # improvements.
819 # improvements.
819 #
820 #
820 # This might reduce the overall efficiency of the compression in
821 # This might reduce the overall efficiency of the compression in
821 # some corner cases, but that should also prevent very pathological
822 # some corner cases, but that should also prevent very pathological
822 # cases from being an issue. (eg. 20 000 candidates).
823 # cases from being an issue. (eg. 20 000 candidates).
823 #
824 #
824 # XXX note that the ordering of the group becomes important as it
825 # XXX note that the ordering of the group becomes important as it
825 # now impacts the final result. The current order is unprocessed
826 # now impacts the final result. The current order is unprocessed
826 # and can be improved.
827 # and can be improved.
827 if group_chunk_size == 0:
828 if group_chunk_size == 0:
828 tested.update(group)
829 tested.update(group)
829 good = yield tuple(group)
830 good = yield tuple(group)
830 else:
831 else:
831 prev_good = good
832 prev_good = good
832 for start in range(0, len(group), group_chunk_size):
833 for start in range(0, len(group), group_chunk_size):
833 sub_group = group[start : start + group_chunk_size]
834 sub_group = group[start : start + group_chunk_size]
834 tested.update(sub_group)
835 tested.update(sub_group)
835 good = yield tuple(sub_group)
836 good = yield tuple(sub_group)
836 if prev_good == good:
837 if prev_good == good:
837 break
838 break
838
839
839 yield None
840 yield None
840
841
841
842
842 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
843 good = None
844 good = None
844 # First we try to reuse a the delta contained in the bundle.
845 # First we try to reuse a the delta contained in the bundle.
845 # (or from the source revlog)
846 # (or from the source revlog)
846 #
847 #
847 # This logic only applies to general delta repositories and can be disabled
848 # This logic only applies to general delta repositories and can be disabled
848 # through configuration. Disabling reuse source delta is useful when
849 # through configuration. Disabling reuse source delta is useful when
849 # we want to make sure we recomputed "optimal" deltas.
850 # we want to make sure we recomputed "optimal" deltas.
850 debug_info = None
851 debug_info = None
851 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
852 # Assume what we received from the server is a good choice
853 # Assume what we received from the server is a good choice
853 # build delta will reuse the cache
854 # build delta will reuse the cache
854 if debug_info is not None:
855 if debug_info is not None:
855 debug_info['cached-delta.tested'] += 1
856 debug_info['cached-delta.tested'] += 1
856 good = yield (cachedelta[0],)
857 good = yield (cachedelta[0],)
857 if good is not None:
858 if good is not None:
858 if debug_info is not None:
859 if debug_info is not None:
859 debug_info['cached-delta.accepted'] += 1
860 debug_info['cached-delta.accepted'] += 1
860 yield None
861 yield None
861 return
862 return
862 if snapshot_cache is None:
863 if snapshot_cache is None:
863 snapshot_cache = SnapshotCache()
864 snapshot_cache = SnapshotCache()
864 groups = _rawgroups(
865 groups = _rawgroups(
865 revlog,
866 revlog,
866 p1,
867 p1,
867 p2,
868 p2,
868 cachedelta,
869 cachedelta,
869 snapshot_cache,
870 snapshot_cache,
870 )
871 )
871 for candidates in groups:
872 for candidates in groups:
872 good = yield candidates
873 good = yield candidates
873 if good is not None:
874 if good is not None:
874 break
875 break
875
876
876 # If sparse revlog is enabled, we can try to refine the available deltas
877 # If sparse revlog is enabled, we can try to refine the available deltas
877 if not revlog.delta_config.sparse_revlog:
878 if not revlog.delta_config.sparse_revlog:
878 yield None
879 yield None
879 return
880 return
880
881
881 # if we have a refinable value, try to refine it
882 # if we have a refinable value, try to refine it
882 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
883 # refine snapshot down
884 # refine snapshot down
884 previous = None
885 previous = None
885 while previous != good:
886 while previous != good:
886 previous = good
887 previous = good
887 base = revlog.deltaparent(good)
888 base = revlog.deltaparent(good)
888 if base == nullrev:
889 if base == nullrev:
889 break
890 break
890 good = yield (base,)
891 good = yield (base,)
891 # refine snapshot up
892 # refine snapshot up
892 if not snapshot_cache.snapshots:
893 if not snapshot_cache.snapshots:
893 snapshot_cache.update(revlog, good + 1)
894 snapshot_cache.update(revlog, good + 1)
894 previous = None
895 previous = None
895 while good != previous:
896 while good != previous:
896 previous = good
897 previous = good
897 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
898 good = yield children
899 good = yield children
899
900
900 if debug_info is not None:
901 if debug_info is not None:
901 if good is None:
902 if good is None:
902 debug_info['no-solution'] += 1
903 debug_info['no-solution'] += 1
903
904
904 yield None
905 yield None
905
906
906
907
907 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
908 """Provides group of revision to be tested as delta base
909 """Provides group of revision to be tested as delta base
909
910
910 This lower level function focus on emitting delta theorically interresting
911 This lower level function focus on emitting delta theorically interresting
911 without looking it any practical details.
912 without looking it any practical details.
912
913
913 The group order aims at providing fast or small candidates first.
914 The group order aims at providing fast or small candidates first.
914 """
915 """
915 # Why search for delta base if we cannot use a delta base ?
916 # Why search for delta base if we cannot use a delta base ?
916 assert revlog.delta_config.general_delta
917 assert revlog.delta_config.general_delta
917 # also see issue6056
918 # also see issue6056
918 sparse = revlog.delta_config.sparse_revlog
919 sparse = revlog.delta_config.sparse_revlog
919 curr = len(revlog)
920 curr = len(revlog)
920 prev = curr - 1
921 prev = curr - 1
921 deltachain = lambda rev: revlog._deltachain(rev)[0]
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
922
923
923 # exclude already lazy tested base if any
924 # exclude already lazy tested base if any
924 parents = [p for p in (p1, p2) if p != nullrev]
925 parents = [p for p in (p1, p2) if p != nullrev]
925
926
926 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
927 parents.sort()
928 parents.sort()
928 # To minimize the chance of having to build a fulltext,
929 # To minimize the chance of having to build a fulltext,
929 # pick first whichever parent is closest to us (max rev)
930 # pick first whichever parent is closest to us (max rev)
930 yield (parents[1],)
931 yield (parents[1],)
931 # then the other one (min rev) if the first did not fit
932 # then the other one (min rev) if the first did not fit
932 yield (parents[0],)
933 yield (parents[0],)
933 elif len(parents) > 0:
934 elif len(parents) > 0:
934 # Test all parents (1 or 2), and keep the best candidate
935 # Test all parents (1 or 2), and keep the best candidate
935 yield parents
936 yield parents
936
937
937 if sparse and parents:
938 if sparse and parents:
938 if snapshot_cache is None:
939 if snapshot_cache is None:
939 # map: base-rev: [snapshot-revs]
940 # map: base-rev: [snapshot-revs]
940 snapshot_cache = SnapshotCache()
941 snapshot_cache = SnapshotCache()
941 # See if we can use an existing snapshot in the parent chains to use as
942 # See if we can use an existing snapshot in the parent chains to use as
942 # a base for a new intermediate-snapshot
943 # a base for a new intermediate-snapshot
943 #
944 #
944 # search for snapshot in parents delta chain
945 # search for snapshot in parents delta chain
945 # map: snapshot-level: snapshot-rev
946 # map: snapshot-level: snapshot-rev
946 parents_snaps = collections.defaultdict(set)
947 parents_snaps = collections.defaultdict(set)
947 candidate_chains = [deltachain(p) for p in parents]
948 candidate_chains = [deltachain(p) for p in parents]
948 for chain in candidate_chains:
949 for chain in candidate_chains:
949 for idx, s in enumerate(chain):
950 for idx, s in enumerate(chain):
950 if not revlog.issnapshot(s):
951 if not revlog.issnapshot(s):
951 break
952 break
952 parents_snaps[idx].add(s)
953 parents_snaps[idx].add(s)
953 snapfloor = min(parents_snaps[0]) + 1
954 snapfloor = min(parents_snaps[0]) + 1
954 snapshot_cache.update(revlog, snapfloor)
955 snapshot_cache.update(revlog, snapfloor)
955 # search for the highest "unrelated" revision
956 # search for the highest "unrelated" revision
956 #
957 #
957 # Adding snapshots used by "unrelated" revision increase the odd we
958 # Adding snapshots used by "unrelated" revision increase the odd we
958 # reuse an independant, yet better snapshot chain.
959 # reuse an independant, yet better snapshot chain.
959 #
960 #
960 # XXX instead of building a set of revisions, we could lazily enumerate
961 # XXX instead of building a set of revisions, we could lazily enumerate
961 # over the chains. That would be more efficient, however we stick to
962 # over the chains. That would be more efficient, however we stick to
962 # simple code for now.
963 # simple code for now.
963 all_revs = set()
964 all_revs = set()
964 for chain in candidate_chains:
965 for chain in candidate_chains:
965 all_revs.update(chain)
966 all_revs.update(chain)
966 other = None
967 other = None
967 for r in revlog.revs(prev, snapfloor):
968 for r in revlog.revs(prev, snapfloor):
968 if r not in all_revs:
969 if r not in all_revs:
969 other = r
970 other = r
970 break
971 break
971 if other is not None:
972 if other is not None:
972 # To avoid unfair competition, we won't use unrelated intermediate
973 # To avoid unfair competition, we won't use unrelated intermediate
973 # snapshot that are deeper than the ones from the parent delta
974 # snapshot that are deeper than the ones from the parent delta
974 # chain.
975 # chain.
975 max_depth = max(parents_snaps.keys())
976 max_depth = max(parents_snaps.keys())
976 chain = deltachain(other)
977 chain = deltachain(other)
977 for depth, s in enumerate(chain):
978 for depth, s in enumerate(chain):
978 if s < snapfloor:
979 if s < snapfloor:
979 continue
980 continue
980 if max_depth < depth:
981 if max_depth < depth:
981 break
982 break
982 if not revlog.issnapshot(s):
983 if not revlog.issnapshot(s):
983 break
984 break
984 parents_snaps[depth].add(s)
985 parents_snaps[depth].add(s)
985 # Test them as possible intermediate snapshot base
986 # Test them as possible intermediate snapshot base
986 # We test them from highest to lowest level. High level one are more
987 # We test them from highest to lowest level. High level one are more
987 # likely to result in small delta
988 # likely to result in small delta
988 floor = None
989 floor = None
989 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
990 siblings = set()
991 siblings = set()
991 for s in snaps:
992 for s in snaps:
992 siblings.update(snapshot_cache.snapshots[s])
993 siblings.update(snapshot_cache.snapshots[s])
993 # Before considering making a new intermediate snapshot, we check
994 # Before considering making a new intermediate snapshot, we check
994 # if an existing snapshot, children of base we consider, would be
995 # if an existing snapshot, children of base we consider, would be
995 # suitable.
996 # suitable.
996 #
997 #
997 # It give a change to reuse a delta chain "unrelated" to the
998 # It give a change to reuse a delta chain "unrelated" to the
998 # current revision instead of starting our own. Without such
999 # current revision instead of starting our own. Without such
999 # re-use, topological branches would keep reopening new chains.
1000 # re-use, topological branches would keep reopening new chains.
1000 # Creating more and more snapshot as the repository grow.
1001 # Creating more and more snapshot as the repository grow.
1001
1002
1002 if floor is not None:
1003 if floor is not None:
1003 # We only do this for siblings created after the one in our
1004 # We only do this for siblings created after the one in our
1004 # parent's delta chain. Those created before has less chances
1005 # parent's delta chain. Those created before has less chances
1005 # to be valid base since our ancestors had to create a new
1006 # to be valid base since our ancestors had to create a new
1006 # snapshot.
1007 # snapshot.
1007 siblings = [r for r in siblings if floor < r]
1008 siblings = [r for r in siblings if floor < r]
1008 yield tuple(sorted(siblings))
1009 yield tuple(sorted(siblings))
1009 # then test the base from our parent's delta chain.
1010 # then test the base from our parent's delta chain.
1010 yield tuple(sorted(snaps))
1011 yield tuple(sorted(snaps))
1011 floor = min(snaps)
1012 floor = min(snaps)
1012 # No suitable base found in the parent chain, search if any full
1013 # No suitable base found in the parent chain, search if any full
1013 # snapshots emitted since parent's base would be a suitable base for an
1014 # snapshots emitted since parent's base would be a suitable base for an
1014 # intermediate snapshot.
1015 # intermediate snapshot.
1015 #
1016 #
1016 # It give a chance to reuse a delta chain unrelated to the current
1017 # It give a chance to reuse a delta chain unrelated to the current
1017 # revisions instead of starting our own. Without such re-use,
1018 # revisions instead of starting our own. Without such re-use,
1018 # topological branches would keep reopening new full chains. Creating
1019 # topological branches would keep reopening new full chains. Creating
1019 # more and more snapshot as the repository grow.
1020 # more and more snapshot as the repository grow.
1020 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1021 yield tuple(sorted(full))
1022 yield tuple(sorted(full))
1022
1023
1023 if not sparse:
1024 if not sparse:
1024 # other approach failed try against prev to hopefully save us a
1025 # other approach failed try against prev to hopefully save us a
1025 # fulltext.
1026 # fulltext.
1026 yield (prev,)
1027 yield (prev,)
1027
1028
1028
1029
1029 class SnapshotCache:
1030 class SnapshotCache:
1030 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1031
1032
1032 def __init__(self):
1033 def __init__(self):
1033 self.snapshots = collections.defaultdict(set)
1034 self.snapshots = collections.defaultdict(set)
1034 self._start_rev = None
1035 self._start_rev = None
1035 self._end_rev = None
1036 self._end_rev = None
1036
1037
1037 def update(self, revlog, start_rev=0):
1038 def update(self, revlog, start_rev=0):
1038 """find snapshots from start_rev to tip"""
1039 """find snapshots from start_rev to tip"""
1039 nb_revs = len(revlog)
1040 nb_revs = len(revlog)
1040 end_rev = nb_revs - 1
1041 end_rev = nb_revs - 1
1041 if start_rev > end_rev:
1042 if start_rev > end_rev:
1042 return # range is empty
1043 return # range is empty
1043
1044
1044 if self._start_rev is None:
1045 if self._start_rev is None:
1045 assert self._end_rev is None
1046 assert self._end_rev is None
1046 self._update(revlog, start_rev, end_rev)
1047 self._update(revlog, start_rev, end_rev)
1047 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1048 if start_rev < self._start_rev:
1049 if start_rev < self._start_rev:
1049 self._update(revlog, start_rev, self._start_rev - 1)
1050 self._update(revlog, start_rev, self._start_rev - 1)
1050 if self._end_rev < end_rev:
1051 if self._end_rev < end_rev:
1051 self._update(revlog, self._end_rev + 1, end_rev)
1052 self._update(revlog, self._end_rev + 1, end_rev)
1052
1053
1053 if self._start_rev is None:
1054 if self._start_rev is None:
1054 assert self._end_rev is None
1055 assert self._end_rev is None
1055 self._end_rev = end_rev
1056 self._end_rev = end_rev
1056 self._start_rev = start_rev
1057 self._start_rev = start_rev
1057 else:
1058 else:
1058 self._start_rev = min(self._start_rev, start_rev)
1059 self._start_rev = min(self._start_rev, start_rev)
1059 self._end_rev = max(self._end_rev, end_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1060 assert self._start_rev <= self._end_rev, (
1061 assert self._start_rev <= self._end_rev, (
1061 self._start_rev,
1062 self._start_rev,
1062 self._end_rev,
1063 self._end_rev,
1063 )
1064 )
1064
1065
1065 def _update(self, revlog, start_rev, end_rev):
1066 def _update(self, revlog, start_rev, end_rev):
1066 """internal method that actually do update content"""
1067 """internal method that actually do update content"""
1067 assert self._start_rev is None or (
1068 assert self._start_rev is None or (
1068 start_rev < self._start_rev or start_rev > self._end_rev
1069 start_rev < self._start_rev or start_rev > self._end_rev
1069 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 assert self._start_rev is None or (
1071 assert self._start_rev is None or (
1071 end_rev < self._start_rev or end_rev > self._end_rev
1072 end_rev < self._start_rev or end_rev > self._end_rev
1072 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1073 cache = self.snapshots
1074 cache = self.snapshots
1074 if hasattr(revlog.index, 'findsnapshots'):
1075 if hasattr(revlog.index, 'findsnapshots'):
1075 revlog.index.findsnapshots(cache, start_rev, end_rev)
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1076 else:
1077 else:
1077 deltaparent = revlog.deltaparent
1078 deltaparent = revlog.deltaparent
1078 issnapshot = revlog.issnapshot
1079 issnapshot = revlog.issnapshot
1079 for rev in revlog.revs(start_rev, end_rev):
1080 for rev in revlog.revs(start_rev, end_rev):
1080 if issnapshot(rev):
1081 if issnapshot(rev):
1081 cache[deltaparent(rev)].add(rev)
1082 cache[deltaparent(rev)].add(rev)
1082
1083
1083
1084
1084 class deltacomputer:
1085 class deltacomputer:
1085 def __init__(
1086 def __init__(
1086 self,
1087 self,
1087 revlog,
1088 revlog,
1088 write_debug=None,
1089 write_debug=None,
1089 debug_search=False,
1090 debug_search=False,
1090 debug_info=None,
1091 debug_info=None,
1091 ):
1092 ):
1092 self.revlog = revlog
1093 self.revlog = revlog
1093 self._write_debug = write_debug
1094 self._write_debug = write_debug
1094 if write_debug is None:
1095 if write_debug is None:
1095 self._debug_search = False
1096 self._debug_search = False
1096 else:
1097 else:
1097 self._debug_search = debug_search
1098 self._debug_search = debug_search
1098 self._debug_info = debug_info
1099 self._debug_info = debug_info
1099 self._snapshot_cache = SnapshotCache()
1100 self._snapshot_cache = SnapshotCache()
1100
1101
1101 @property
1102 @property
1102 def _gather_debug(self):
1103 def _gather_debug(self):
1103 return self._write_debug is not None or self._debug_info is not None
1104 return self._write_debug is not None or self._debug_info is not None
1104
1105
1105 def buildtext(self, revinfo):
1106 def buildtext(self, revinfo):
1106 """Builds a fulltext version of a revision
1107 """Builds a fulltext version of a revision
1107
1108
1108 revinfo: revisioninfo instance that contains all needed info
1109 revinfo: revisioninfo instance that contains all needed info
1109 """
1110 """
1110 btext = revinfo.btext
1111 btext = revinfo.btext
1111 if btext[0] is not None:
1112 if btext[0] is not None:
1112 return btext[0]
1113 return btext[0]
1113
1114
1114 revlog = self.revlog
1115 revlog = self.revlog
1115 cachedelta = revinfo.cachedelta
1116 cachedelta = revinfo.cachedelta
1116 baserev = cachedelta[0]
1117 baserev = cachedelta[0]
1117 delta = cachedelta[1]
1118 delta = cachedelta[1]
1118
1119
1119 fulltext = btext[0] = _textfromdelta(
1120 fulltext = btext[0] = _textfromdelta(
1120 revlog,
1121 revlog,
1121 baserev,
1122 baserev,
1122 delta,
1123 delta,
1123 revinfo.p1,
1124 revinfo.p1,
1124 revinfo.p2,
1125 revinfo.p2,
1125 revinfo.flags,
1126 revinfo.flags,
1126 revinfo.node,
1127 revinfo.node,
1127 )
1128 )
1128 return fulltext
1129 return fulltext
1129
1130
1130 def _builddeltadiff(self, base, revinfo):
1131 def _builddeltadiff(self, base, revinfo):
1131 revlog = self.revlog
1132 revlog = self.revlog
1132 t = self.buildtext(revinfo)
1133 t = self.buildtext(revinfo)
1133 if revlog.iscensored(base):
1134 if revlog.iscensored(base):
1134 # deltas based on a censored revision must replace the
1135 # deltas based on a censored revision must replace the
1135 # full content in one patch, so delta works everywhere
1136 # full content in one patch, so delta works everywhere
1136 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1137 delta = header + t
1138 delta = header + t
1138 else:
1139 else:
1139 ptext = revlog.rawdata(base)
1140 ptext = revlog.rawdata(base)
1140 delta = mdiff.textdiff(ptext, t)
1141 delta = mdiff.textdiff(ptext, t)
1141
1142
1142 return delta
1143 return delta
1143
1144
1144 def _builddeltainfo(self, revinfo, base, target_rev=None):
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1145 # can we use the cached delta?
1146 # can we use the cached delta?
1146 revlog = self.revlog
1147 revlog = self.revlog
1147 chainbase = revlog.chainbase(base)
1148 chainbase = revlog.chainbase(base)
1148 if revlog.delta_config.general_delta:
1149 if revlog.delta_config.general_delta:
1149 deltabase = base
1150 deltabase = base
1150 else:
1151 else:
1151 if target_rev is not None and base != target_rev - 1:
1152 if target_rev is not None and base != target_rev - 1:
1152 msg = (
1153 msg = (
1153 b'general delta cannot use delta for something else '
1154 b'general delta cannot use delta for something else '
1154 b'than `prev`: %d<-%d'
1155 b'than `prev`: %d<-%d'
1155 )
1156 )
1156 msg %= (base, target_rev)
1157 msg %= (base, target_rev)
1157 raise error.ProgrammingError(msg)
1158 raise error.ProgrammingError(msg)
1158 deltabase = chainbase
1159 deltabase = chainbase
1159 snapshotdepth = None
1160 snapshotdepth = None
1160 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1161 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1161 snapshotdepth = 0
1162 snapshotdepth = 0
1162 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1163 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1163 # A delta chain should always be one full snapshot,
1164 # A delta chain should always be one full snapshot,
1164 # zero or more semi-snapshots, and zero or more deltas
1165 # zero or more semi-snapshots, and zero or more deltas
1165 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1166 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1167 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1168 delta = None
1169 delta = None
1169 if revinfo.cachedelta:
1170 if revinfo.cachedelta:
1170 cachebase = revinfo.cachedelta[0]
1171 cachebase = revinfo.cachedelta[0]
1171 # check if the diff still apply
1172 # check if the diff still apply
1172 currentbase = cachebase
1173 currentbase = cachebase
1173 while (
1174 while (
1174 currentbase != nullrev
1175 currentbase != nullrev
1175 and currentbase != base
1176 and currentbase != base
1176 and self.revlog.length(currentbase) == 0
1177 and self.revlog.length(currentbase) == 0
1177 ):
1178 ):
1178 currentbase = self.revlog.deltaparent(currentbase)
1179 currentbase = self.revlog.deltaparent(currentbase)
1179 if self.revlog._lazydelta and currentbase == base:
1180 if self.revlog._lazydelta and currentbase == base:
1180 delta = revinfo.cachedelta[1]
1181 delta = revinfo.cachedelta[1]
1181 if delta is None:
1182 if delta is None:
1182 delta = self._builddeltadiff(base, revinfo)
1183 delta = self._builddeltadiff(base, revinfo)
1183 if self._debug_search:
1184 if self._debug_search:
1184 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1185 msg %= len(delta)
1186 msg %= len(delta)
1186 self._write_debug(msg)
1187 self._write_debug(msg)
1187 # snapshotdept need to be neither None nor 0 level snapshot
1188 # snapshotdept need to be neither None nor 0 level snapshot
1188 if revlog.upperboundcomp is not None and snapshotdepth:
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1189 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1190 snapshotlimit = revinfo.textlen >> snapshotdepth
1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1191 if self._debug_search:
1192 if self._debug_search:
1192 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1193 msg %= lowestrealisticdeltalen
1194 msg %= lowestrealisticdeltalen
1194 self._write_debug(msg)
1195 self._write_debug(msg)
1195 if snapshotlimit < lowestrealisticdeltalen:
1196 if snapshotlimit < lowestrealisticdeltalen:
1196 if self._debug_search:
1197 if self._debug_search:
1197 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1198 self._write_debug(msg)
1199 self._write_debug(msg)
1199 return None
1200 return None
1200 if revlog.length(base) < lowestrealisticdeltalen:
1201 if revlog.length(base) < lowestrealisticdeltalen:
1201 if self._debug_search:
1202 if self._debug_search:
1202 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1203 self._write_debug(msg)
1204 self._write_debug(msg)
1204 return None
1205 return None
1205 header, data = revlog.compress(delta)
1206 header, data = revlog.compress(delta)
1206 deltalen = len(header) + len(data)
1207 deltalen = len(header) + len(data)
1207 offset = revlog.end(len(revlog) - 1)
1208 offset = revlog.end(len(revlog) - 1)
1208 dist = deltalen + offset - revlog.start(chainbase)
1209 dist = deltalen + offset - revlog.start(chainbase)
1209 chainlen, compresseddeltalen = revlog._chaininfo(base)
1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1210 chainlen += 1
1211 chainlen += 1
1211 compresseddeltalen += deltalen
1212 compresseddeltalen += deltalen
1212
1213
1213 return _deltainfo(
1214 return _deltainfo(
1214 dist,
1215 dist,
1215 deltalen,
1216 deltalen,
1216 (header, data),
1217 (header, data),
1217 deltabase,
1218 deltabase,
1218 chainbase,
1219 chainbase,
1219 chainlen,
1220 chainlen,
1220 compresseddeltalen,
1221 compresseddeltalen,
1221 snapshotdepth,
1222 snapshotdepth,
1222 )
1223 )
1223
1224
1224 def _fullsnapshotinfo(self, revinfo, curr):
1225 def _fullsnapshotinfo(self, revinfo, curr):
1225 rawtext = self.buildtext(revinfo)
1226 rawtext = self.buildtext(revinfo)
1226 data = self.revlog.compress(rawtext)
1227 data = self.revlog.compress(rawtext)
1227 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1228 deltabase = chainbase = curr
1229 deltabase = chainbase = curr
1229 snapshotdepth = 0
1230 snapshotdepth = 0
1230 chainlen = 1
1231 chainlen = 1
1231
1232
1232 return _deltainfo(
1233 return _deltainfo(
1233 dist,
1234 dist,
1234 deltalen,
1235 deltalen,
1235 data,
1236 data,
1236 deltabase,
1237 deltabase,
1237 chainbase,
1238 chainbase,
1238 chainlen,
1239 chainlen,
1239 compresseddeltalen,
1240 compresseddeltalen,
1240 snapshotdepth,
1241 snapshotdepth,
1241 )
1242 )
1242
1243
1243 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1244 """Find an acceptable delta against a candidate revision
1245 """Find an acceptable delta against a candidate revision
1245
1246
1246 revinfo: information about the revision (instance of _revisioninfo)
1247 revinfo: information about the revision (instance of _revisioninfo)
1247
1248
1248 Returns the first acceptable candidate revision, as ordered by
1249 Returns the first acceptable candidate revision, as ordered by
1249 _candidategroups
1250 _candidategroups
1250
1251
1251 If no suitable deltabase is found, we return delta info for a full
1252 If no suitable deltabase is found, we return delta info for a full
1252 snapshot.
1253 snapshot.
1253
1254
1254 `excluded_bases` is an optional set of revision that cannot be used as
1255 `excluded_bases` is an optional set of revision that cannot be used as
1255 a delta base. Use this to recompute delta suitable in censor or strip
1256 a delta base. Use this to recompute delta suitable in censor or strip
1256 context.
1257 context.
1257 """
1258 """
1258 if target_rev is None:
1259 if target_rev is None:
1259 target_rev = len(self.revlog)
1260 target_rev = len(self.revlog)
1260
1261
1261 gather_debug = self._gather_debug
1262 gather_debug = self._gather_debug
1262 cachedelta = revinfo.cachedelta
1263 cachedelta = revinfo.cachedelta
1263 revlog = self.revlog
1264 revlog = self.revlog
1264 p1r = p2r = None
1265 p1r = p2r = None
1265
1266
1266 if excluded_bases is None:
1267 if excluded_bases is None:
1267 excluded_bases = set()
1268 excluded_bases = set()
1268
1269
1269 if gather_debug:
1270 if gather_debug:
1270 start = util.timer()
1271 start = util.timer()
1271 dbg = self._one_dbg_data()
1272 dbg = self._one_dbg_data()
1272 dbg['revision'] = target_rev
1273 dbg['revision'] = target_rev
1273 target_revlog = b"UNKNOWN"
1274 target_revlog = b"UNKNOWN"
1274 target_type = self.revlog.target[0]
1275 target_type = self.revlog.target[0]
1275 target_key = self.revlog.target[1]
1276 target_key = self.revlog.target[1]
1276 if target_type == KIND_CHANGELOG:
1277 if target_type == KIND_CHANGELOG:
1277 target_revlog = b'CHANGELOG:'
1278 target_revlog = b'CHANGELOG:'
1278 elif target_type == KIND_MANIFESTLOG:
1279 elif target_type == KIND_MANIFESTLOG:
1279 target_revlog = b'MANIFESTLOG:'
1280 target_revlog = b'MANIFESTLOG:'
1280 if target_key:
1281 if target_key:
1281 target_revlog += b'%s:' % target_key
1282 target_revlog += b'%s:' % target_key
1282 elif target_type == KIND_FILELOG:
1283 elif target_type == KIND_FILELOG:
1283 target_revlog = b'FILELOG:'
1284 target_revlog = b'FILELOG:'
1284 if target_key:
1285 if target_key:
1285 target_revlog += b'%s:' % target_key
1286 target_revlog += b'%s:' % target_key
1286 dbg['target-revlog'] = target_revlog
1287 dbg['target-revlog'] = target_revlog
1287 p1r = revlog.rev(revinfo.p1)
1288 p1r = revlog.rev(revinfo.p1)
1288 p2r = revlog.rev(revinfo.p2)
1289 p2r = revlog.rev(revinfo.p2)
1289 if p1r != nullrev:
1290 if p1r != nullrev:
1290 p1_chain_len = revlog._chaininfo(p1r)[0]
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1291 else:
1292 else:
1292 p1_chain_len = -1
1293 p1_chain_len = -1
1293 if p2r != nullrev:
1294 if p2r != nullrev:
1294 p2_chain_len = revlog._chaininfo(p2r)[0]
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1295 else:
1296 else:
1296 p2_chain_len = -1
1297 p2_chain_len = -1
1297 dbg['p1-chain-len'] = p1_chain_len
1298 dbg['p1-chain-len'] = p1_chain_len
1298 dbg['p2-chain-len'] = p2_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1299
1300
1300 # 1) if the revision is empty, no amount of delta can beat it
1301 # 1) if the revision is empty, no amount of delta can beat it
1301 #
1302 #
1302 # 2) no delta for flag processor revision (see "candelta" for why)
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1303 # not calling candelta since only one revision needs test, also to
1304 # not calling candelta since only one revision needs test, also to
1304 # avoid overhead fetching flags again.
1305 # avoid overhead fetching flags again.
1305 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1306 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1307 if gather_debug:
1308 if gather_debug:
1308 end = util.timer()
1309 end = util.timer()
1309 dbg['duration'] = end - start
1310 dbg['duration'] = end - start
1310 dbg[
1311 dbg[
1311 'delta-base'
1312 'delta-base'
1312 ] = deltainfo.base # pytype: disable=attribute-error
1313 ] = deltainfo.base # pytype: disable=attribute-error
1313 dbg['search_round_count'] = 0
1314 dbg['search_round_count'] = 0
1314 dbg['using-cached-base'] = False
1315 dbg['using-cached-base'] = False
1315 dbg['delta_try_count'] = 0
1316 dbg['delta_try_count'] = 0
1316 dbg['type'] = b"full"
1317 dbg['type'] = b"full"
1317 dbg['snapshot-depth'] = 0
1318 dbg['snapshot-depth'] = 0
1318 self._dbg_process_data(dbg)
1319 self._dbg_process_data(dbg)
1319 return deltainfo
1320 return deltainfo
1320
1321
1321 deltainfo = None
1322 deltainfo = None
1322
1323
1323 # If this source delta are to be forcibly reuse, let us comply early.
1324 # If this source delta are to be forcibly reuse, let us comply early.
1324 if (
1325 if (
1325 revlog.delta_config.general_delta
1326 revlog.delta_config.general_delta
1326 and revinfo.cachedelta is not None
1327 and revinfo.cachedelta is not None
1327 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1328 ):
1329 ):
1329 base = revinfo.cachedelta[0]
1330 base = revinfo.cachedelta[0]
1330 if base == nullrev:
1331 if base == nullrev:
1331 dbg_type = b"full"
1332 dbg_type = b"full"
1332 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1333 if gather_debug:
1334 if gather_debug:
1334 snapshotdepth = 0
1335 snapshotdepth = 0
1335 elif base not in excluded_bases:
1336 elif base not in excluded_bases:
1336 delta = revinfo.cachedelta[1]
1337 delta = revinfo.cachedelta[1]
1337 header, data = revlog.compress(delta)
1338 header, data = revlog.compress(delta)
1338 deltalen = len(header) + len(data)
1339 deltalen = len(header) + len(data)
1339 if gather_debug:
1340 if gather_debug:
1340 offset = revlog.end(len(revlog) - 1)
1341 offset = revlog.end(len(revlog) - 1)
1341 chainbase = revlog.chainbase(base)
1342 chainbase = revlog.chainbase(base)
1342 distance = deltalen + offset - revlog.start(chainbase)
1343 distance = deltalen + offset - revlog.start(chainbase)
1343 chainlen, compresseddeltalen = revlog._chaininfo(base)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1344 chainlen += 1
1345 chainlen += 1
1345 compresseddeltalen += deltalen
1346 compresseddeltalen += deltalen
1346 if base == p1r or base == p2r:
1347 if base == p1r or base == p2r:
1347 dbg_type = b"delta"
1348 dbg_type = b"delta"
1348 snapshotdepth = None
1349 snapshotdepth = None
1349 elif not revlog.issnapshot(base):
1350 elif not revlog.issnapshot(base):
1350 snapshotdepth = None
1351 snapshotdepth = None
1351 else:
1352 else:
1352 dbg_type = b"snapshot"
1353 dbg_type = b"snapshot"
1353 snapshotdepth = revlog.snapshotdepth(base) + 1
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1354 else:
1355 else:
1355 distance = None
1356 distance = None
1356 chainbase = None
1357 chainbase = None
1357 chainlen = None
1358 chainlen = None
1358 compresseddeltalen = None
1359 compresseddeltalen = None
1359 snapshotdepth = None
1360 snapshotdepth = None
1360 deltainfo = _deltainfo(
1361 deltainfo = _deltainfo(
1361 distance=distance,
1362 distance=distance,
1362 deltalen=deltalen,
1363 deltalen=deltalen,
1363 data=(header, data),
1364 data=(header, data),
1364 base=base,
1365 base=base,
1365 chainbase=chainbase,
1366 chainbase=chainbase,
1366 chainlen=chainlen,
1367 chainlen=chainlen,
1367 compresseddeltalen=compresseddeltalen,
1368 compresseddeltalen=compresseddeltalen,
1368 snapshotdepth=snapshotdepth,
1369 snapshotdepth=snapshotdepth,
1369 )
1370 )
1370
1371
1371 if deltainfo is not None:
1372 if deltainfo is not None:
1372 if gather_debug:
1373 if gather_debug:
1373 end = util.timer()
1374 end = util.timer()
1374 dbg['duration'] = end - start
1375 dbg['duration'] = end - start
1375 dbg[
1376 dbg[
1376 'delta-base'
1377 'delta-base'
1377 ] = deltainfo.base # pytype: disable=attribute-error
1378 ] = deltainfo.base # pytype: disable=attribute-error
1378 dbg['search_round_count'] = 0
1379 dbg['search_round_count'] = 0
1379 dbg['using-cached-base'] = True
1380 dbg['using-cached-base'] = True
1380 dbg['delta_try_count'] = 0
1381 dbg['delta_try_count'] = 0
1381 dbg['type'] = b"full"
1382 dbg['type'] = b"full"
1382 if snapshotdepth is None:
1383 if snapshotdepth is None:
1383 dbg['snapshot-depth'] = 0
1384 dbg['snapshot-depth'] = 0
1384 else:
1385 else:
1385 dbg['snapshot-depth'] = snapshotdepth
1386 dbg['snapshot-depth'] = snapshotdepth
1386 self._dbg_process_data(dbg)
1387 self._dbg_process_data(dbg)
1387 return deltainfo
1388 return deltainfo
1388
1389
1389 # count the number of different delta we tried (for debug purpose)
1390 # count the number of different delta we tried (for debug purpose)
1390 dbg_try_count = 0
1391 dbg_try_count = 0
1391 # count the number of "search round" we did. (for debug purpose)
1392 # count the number of "search round" we did. (for debug purpose)
1392 dbg_try_rounds = 0
1393 dbg_try_rounds = 0
1393 dbg_type = b'unknown'
1394 dbg_type = b'unknown'
1394
1395
1395 if p1r is None:
1396 if p1r is None:
1396 p1r = revlog.rev(revinfo.p1)
1397 p1r = revlog.rev(revinfo.p1)
1397 p2r = revlog.rev(revinfo.p2)
1398 p2r = revlog.rev(revinfo.p2)
1398
1399
1399 if self._debug_search:
1400 if self._debug_search:
1400 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1401 msg %= target_rev
1402 msg %= target_rev
1402 self._write_debug(msg)
1403 self._write_debug(msg)
1403
1404
1404 groups = _candidategroups(
1405 groups = _candidategroups(
1405 self.revlog,
1406 self.revlog,
1406 revinfo.textlen,
1407 revinfo.textlen,
1407 p1r,
1408 p1r,
1408 p2r,
1409 p2r,
1409 cachedelta,
1410 cachedelta,
1410 excluded_bases,
1411 excluded_bases,
1411 target_rev,
1412 target_rev,
1412 snapshot_cache=self._snapshot_cache,
1413 snapshot_cache=self._snapshot_cache,
1413 )
1414 )
1414 candidaterevs = next(groups)
1415 candidaterevs = next(groups)
1415 while candidaterevs is not None:
1416 while candidaterevs is not None:
1416 dbg_try_rounds += 1
1417 dbg_try_rounds += 1
1417 if self._debug_search:
1418 if self._debug_search:
1418 prev = None
1419 prev = None
1419 if deltainfo is not None:
1420 if deltainfo is not None:
1420 prev = deltainfo.base
1421 prev = deltainfo.base
1421
1422
1422 if (
1423 if (
1423 cachedelta is not None
1424 cachedelta is not None
1424 and len(candidaterevs) == 1
1425 and len(candidaterevs) == 1
1425 and cachedelta[0] in candidaterevs
1426 and cachedelta[0] in candidaterevs
1426 ):
1427 ):
1427 round_type = b"cached-delta"
1428 round_type = b"cached-delta"
1428 elif p1r in candidaterevs or p2r in candidaterevs:
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1429 round_type = b"parents"
1430 round_type = b"parents"
1430 elif prev is not None and all(c < prev for c in candidaterevs):
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1431 round_type = b"refine-down"
1432 round_type = b"refine-down"
1432 elif prev is not None and all(c > prev for c in candidaterevs):
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1433 round_type = b"refine-up"
1434 round_type = b"refine-up"
1434 else:
1435 else:
1435 round_type = b"search-down"
1436 round_type = b"search-down"
1436 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1437 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1438 self._write_debug(msg)
1439 self._write_debug(msg)
1439 nominateddeltas = []
1440 nominateddeltas = []
1440 if deltainfo is not None:
1441 if deltainfo is not None:
1441 if self._debug_search:
1442 if self._debug_search:
1442 msg = (
1443 msg = (
1443 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1444 )
1445 )
1445 msg %= (deltainfo.base, deltainfo.deltalen)
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1446 self._write_debug(msg)
1447 self._write_debug(msg)
1447 # if we already found a good delta,
1448 # if we already found a good delta,
1448 # challenge it against refined candidates
1449 # challenge it against refined candidates
1449 nominateddeltas.append(deltainfo)
1450 nominateddeltas.append(deltainfo)
1450 for candidaterev in candidaterevs:
1451 for candidaterev in candidaterevs:
1451 if self._debug_search:
1452 if self._debug_search:
1452 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1453 msg %= candidaterev
1454 msg %= candidaterev
1454 self._write_debug(msg)
1455 self._write_debug(msg)
1455 candidate_type = None
1456 candidate_type = None
1456 if candidaterev == p1r:
1457 if candidaterev == p1r:
1457 candidate_type = b"p1"
1458 candidate_type = b"p1"
1458 elif candidaterev == p2r:
1459 elif candidaterev == p2r:
1459 candidate_type = b"p2"
1460 candidate_type = b"p2"
1460 elif self.revlog.issnapshot(candidaterev):
1461 elif self.revlog.issnapshot(candidaterev):
1461 candidate_type = b"snapshot-%d"
1462 candidate_type = b"snapshot-%d"
1462 candidate_type %= self.revlog.snapshotdepth(
1463 candidate_type %= self.revlog.snapshotdepth(
1463 candidaterev
1464 candidaterev
1464 )
1465 )
1465
1466
1466 if candidate_type is not None:
1467 if candidate_type is not None:
1467 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1468 msg %= candidate_type
1469 msg %= candidate_type
1469 self._write_debug(msg)
1470 self._write_debug(msg)
1470 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1471 msg %= self.revlog.length(candidaterev)
1472 msg %= self.revlog.length(candidaterev)
1472 self._write_debug(msg)
1473 self._write_debug(msg)
1473 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1474 msg %= self.revlog.deltaparent(candidaterev)
1475 msg %= self.revlog.deltaparent(candidaterev)
1475 self._write_debug(msg)
1476 self._write_debug(msg)
1476
1477
1477 dbg_try_count += 1
1478 dbg_try_count += 1
1478
1479
1479 if self._debug_search:
1480 if self._debug_search:
1480 delta_start = util.timer()
1481 delta_start = util.timer()
1481 candidatedelta = self._builddeltainfo(
1482 candidatedelta = self._builddeltainfo(
1482 revinfo,
1483 revinfo,
1483 candidaterev,
1484 candidaterev,
1484 target_rev=target_rev,
1485 target_rev=target_rev,
1485 )
1486 )
1486 if self._debug_search:
1487 if self._debug_search:
1487 delta_end = util.timer()
1488 delta_end = util.timer()
1488 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1489 msg %= delta_end - delta_start
1490 msg %= delta_end - delta_start
1490 self._write_debug(msg)
1491 self._write_debug(msg)
1491 if candidatedelta is not None:
1492 if candidatedelta is not None:
1492 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1493 if self._debug_search:
1494 if self._debug_search:
1494 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1495 msg %= candidatedelta.deltalen
1496 msg %= candidatedelta.deltalen
1496 self._write_debug(msg)
1497 self._write_debug(msg)
1497 nominateddeltas.append(candidatedelta)
1498 nominateddeltas.append(candidatedelta)
1498 elif self._debug_search:
1499 elif self._debug_search:
1499 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1500 msg %= candidatedelta.deltalen
1501 msg %= candidatedelta.deltalen
1501 self._write_debug(msg)
1502 self._write_debug(msg)
1502 elif self._debug_search:
1503 elif self._debug_search:
1503 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1504 self._write_debug(msg)
1505 self._write_debug(msg)
1505 if nominateddeltas:
1506 if nominateddeltas:
1506 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1507 if deltainfo is not None:
1508 if deltainfo is not None:
1508 candidaterevs = groups.send(deltainfo.base)
1509 candidaterevs = groups.send(deltainfo.base)
1509 else:
1510 else:
1510 candidaterevs = next(groups)
1511 candidaterevs = next(groups)
1511
1512
1512 if deltainfo is None:
1513 if deltainfo is None:
1513 dbg_type = b"full"
1514 dbg_type = b"full"
1514 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1515 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1516 dbg_type = b"snapshot"
1517 dbg_type = b"snapshot"
1517 else:
1518 else:
1518 dbg_type = b"delta"
1519 dbg_type = b"delta"
1519
1520
1520 if gather_debug:
1521 if gather_debug:
1521 end = util.timer()
1522 end = util.timer()
1522 if dbg_type == b'full':
1523 if dbg_type == b'full':
1523 used_cached = (
1524 used_cached = (
1524 cachedelta is not None
1525 cachedelta is not None
1525 and dbg_try_rounds == 0
1526 and dbg_try_rounds == 0
1526 and dbg_try_count == 0
1527 and dbg_try_count == 0
1527 and cachedelta[0] == nullrev
1528 and cachedelta[0] == nullrev
1528 )
1529 )
1529 else:
1530 else:
1530 used_cached = (
1531 used_cached = (
1531 cachedelta is not None
1532 cachedelta is not None
1532 and dbg_try_rounds == 1
1533 and dbg_try_rounds == 1
1533 and dbg_try_count == 1
1534 and dbg_try_count == 1
1534 and deltainfo.base == cachedelta[0]
1535 and deltainfo.base == cachedelta[0]
1535 )
1536 )
1536 dbg['duration'] = end - start
1537 dbg['duration'] = end - start
1537 dbg[
1538 dbg[
1538 'delta-base'
1539 'delta-base'
1539 ] = deltainfo.base # pytype: disable=attribute-error
1540 ] = deltainfo.base # pytype: disable=attribute-error
1540 dbg['search_round_count'] = dbg_try_rounds
1541 dbg['search_round_count'] = dbg_try_rounds
1541 dbg['using-cached-base'] = used_cached
1542 dbg['using-cached-base'] = used_cached
1542 dbg['delta_try_count'] = dbg_try_count
1543 dbg['delta_try_count'] = dbg_try_count
1543 dbg['type'] = dbg_type
1544 dbg['type'] = dbg_type
1544 if (
1545 if (
1545 deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 is not None
1547 is not None
1547 ):
1548 ):
1548 dbg[
1549 dbg[
1549 'snapshot-depth'
1550 'snapshot-depth'
1550 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1551 else:
1552 else:
1552 dbg['snapshot-depth'] = 0
1553 dbg['snapshot-depth'] = 0
1553 self._dbg_process_data(dbg)
1554 self._dbg_process_data(dbg)
1554 return deltainfo
1555 return deltainfo
1555
1556
1556 def _one_dbg_data(self):
1557 def _one_dbg_data(self):
1557 return {
1558 return {
1558 'duration': None,
1559 'duration': None,
1559 'revision': None,
1560 'revision': None,
1560 'delta-base': None,
1561 'delta-base': None,
1561 'search_round_count': None,
1562 'search_round_count': None,
1562 'using-cached-base': None,
1563 'using-cached-base': None,
1563 'delta_try_count': None,
1564 'delta_try_count': None,
1564 'type': None,
1565 'type': None,
1565 'p1-chain-len': None,
1566 'p1-chain-len': None,
1566 'p2-chain-len': None,
1567 'p2-chain-len': None,
1567 'snapshot-depth': None,
1568 'snapshot-depth': None,
1568 'target-revlog': None,
1569 'target-revlog': None,
1569 }
1570 }
1570
1571
1571 def _dbg_process_data(self, dbg):
1572 def _dbg_process_data(self, dbg):
1572 if self._debug_info is not None:
1573 if self._debug_info is not None:
1573 self._debug_info.append(dbg)
1574 self._debug_info.append(dbg)
1574
1575
1575 if self._write_debug is not None:
1576 if self._write_debug is not None:
1576 msg = (
1577 msg = (
1577 b"DBG-DELTAS:"
1578 b"DBG-DELTAS:"
1578 b" %-12s"
1579 b" %-12s"
1579 b" rev=%d:"
1580 b" rev=%d:"
1580 b" delta-base=%d"
1581 b" delta-base=%d"
1581 b" is-cached=%d"
1582 b" is-cached=%d"
1582 b" - search-rounds=%d"
1583 b" - search-rounds=%d"
1583 b" try-count=%d"
1584 b" try-count=%d"
1584 b" - delta-type=%-6s"
1585 b" - delta-type=%-6s"
1585 b" snap-depth=%d"
1586 b" snap-depth=%d"
1586 b" - p1-chain-length=%d"
1587 b" - p1-chain-length=%d"
1587 b" p2-chain-length=%d"
1588 b" p2-chain-length=%d"
1588 b" - duration=%f"
1589 b" - duration=%f"
1589 b"\n"
1590 b"\n"
1590 )
1591 )
1591 msg %= (
1592 msg %= (
1592 dbg["target-revlog"],
1593 dbg["target-revlog"],
1593 dbg["revision"],
1594 dbg["revision"],
1594 dbg["delta-base"],
1595 dbg["delta-base"],
1595 dbg["using-cached-base"],
1596 dbg["using-cached-base"],
1596 dbg["search_round_count"],
1597 dbg["search_round_count"],
1597 dbg["delta_try_count"],
1598 dbg["delta_try_count"],
1598 dbg["type"],
1599 dbg["type"],
1599 dbg["snapshot-depth"],
1600 dbg["snapshot-depth"],
1600 dbg["p1-chain-len"],
1601 dbg["p1-chain-len"],
1601 dbg["p2-chain-len"],
1602 dbg["p2-chain-len"],
1602 dbg["duration"],
1603 dbg["duration"],
1603 )
1604 )
1604 self._write_debug(msg)
1605 self._write_debug(msg)
1605
1606
1606
1607
1607 def delta_compression(default_compression_header, deltainfo):
1608 def delta_compression(default_compression_header, deltainfo):
1608 """return (COMPRESSION_MODE, deltainfo)
1609 """return (COMPRESSION_MODE, deltainfo)
1609
1610
1610 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1611 compression.
1612 compression.
1612 """
1613 """
1613 h, d = deltainfo.data
1614 h, d = deltainfo.data
1614 compression_mode = COMP_MODE_INLINE
1615 compression_mode = COMP_MODE_INLINE
1615 if not h and not d:
1616 if not h and not d:
1616 # not data to store at all... declare them uncompressed
1617 # not data to store at all... declare them uncompressed
1617 compression_mode = COMP_MODE_PLAIN
1618 compression_mode = COMP_MODE_PLAIN
1618 elif not h:
1619 elif not h:
1619 t = d[0:1]
1620 t = d[0:1]
1620 if t == b'\0':
1621 if t == b'\0':
1621 compression_mode = COMP_MODE_PLAIN
1622 compression_mode = COMP_MODE_PLAIN
1622 elif t == default_compression_header:
1623 elif t == default_compression_header:
1623 compression_mode = COMP_MODE_DEFAULT
1624 compression_mode = COMP_MODE_DEFAULT
1624 elif h == b'u':
1625 elif h == b'u':
1625 # we have a more efficient way to declare uncompressed
1626 # we have a more efficient way to declare uncompressed
1626 h = b''
1627 h = b''
1627 compression_mode = COMP_MODE_PLAIN
1628 compression_mode = COMP_MODE_PLAIN
1628 deltainfo = drop_u_compression(deltainfo)
1629 deltainfo = drop_u_compression(deltainfo)
1629 return compression_mode, deltainfo
1630 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now